mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-14 21:37:52 -07:00
Merge pull request #9566 from Sintendo/jit64divwx
Jit64: Optimize divwx
This commit is contained in:
commit
15ebb1d9e4
@ -430,6 +430,8 @@ add_library(core
|
||||
PowerPC/Interpreter/Interpreter_Tables.cpp
|
||||
PowerPC/Interpreter/Interpreter.cpp
|
||||
PowerPC/Interpreter/Interpreter.h
|
||||
PowerPC/JitCommon/DivUtils.cpp
|
||||
PowerPC/JitCommon/DivUtils.h
|
||||
PowerPC/JitCommon/JitAsmCommon.cpp
|
||||
PowerPC/JitCommon/JitAsmCommon.h
|
||||
PowerPC/JitCommon/JitBase.cpp
|
||||
|
@ -96,7 +96,7 @@ public:
|
||||
|
||||
void GenerateConstantOverflow(bool overflow);
|
||||
void GenerateConstantOverflow(s64 val);
|
||||
void GenerateOverflow();
|
||||
void GenerateOverflow(Gen::CCFlags cond = Gen::CCFlags::CC_NO);
|
||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void FinalizeCarry(Gen::CCFlags cond);
|
||||
void FinalizeCarry(bool ca);
|
||||
|
@ -16,10 +16,12 @@
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
#include "Core/PowerPC/JitCommon/DivUtils.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
using namespace Gen;
|
||||
using namespace JitCommon;
|
||||
|
||||
void Jit64::GenerateConstantOverflow(s64 val)
|
||||
{
|
||||
@ -42,9 +44,9 @@ void Jit64::GenerateConstantOverflow(bool overflow)
|
||||
}
|
||||
|
||||
// We could do overflow branchlessly, but unlike carry it seems to be quite a bit rarer.
|
||||
void Jit64::GenerateOverflow()
|
||||
void Jit64::GenerateOverflow(Gen::CCFlags cond)
|
||||
{
|
||||
FixupBranch jno = J_CC(CC_NO);
|
||||
FixupBranch jno = J_CC(cond);
|
||||
// XER[OV/SO] = 1
|
||||
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
|
||||
FixupBranch exit = J();
|
||||
@ -1342,6 +1344,207 @@ void Jit64::divwx(UGeckoInstruction inst)
|
||||
GenerateConstantOverflow(false);
|
||||
}
|
||||
}
|
||||
else if (gpr.IsImm(a))
|
||||
{
|
||||
// Constant dividend
|
||||
const u32 dividend = gpr.Imm32(a);
|
||||
|
||||
if (dividend == 0)
|
||||
{
|
||||
if (inst.OE)
|
||||
{
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RegCache::Realize(Rb);
|
||||
|
||||
CMP_or_TEST(32, Rb, Imm32(0));
|
||||
GenerateOverflow(CC_NZ);
|
||||
}
|
||||
|
||||
// Zero divided by anything is always zero
|
||||
gpr.SetImmediate32(d, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
RCX64Reg Rb = gpr.Bind(b, RCMode::Read);
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
// no register choice
|
||||
RCX64Reg eax = gpr.Scratch(EAX);
|
||||
RCX64Reg edx = gpr.Scratch(EDX);
|
||||
RegCache::Realize(Rb, Rd, eax, edx);
|
||||
|
||||
// Check for divisor == 0
|
||||
TEST(32, Rb, Rb);
|
||||
|
||||
FixupBranch normal_path;
|
||||
|
||||
if (dividend == 0x80000000)
|
||||
{
|
||||
// Divisor is 0, proceed to overflow case
|
||||
const FixupBranch overflow = J_CC(CC_Z);
|
||||
// Otherwise, check for divisor == -1
|
||||
CMP(32, Rb, Imm32(0xFFFFFFFF));
|
||||
normal_path = J_CC(CC_NE);
|
||||
|
||||
SetJumpTarget(overflow);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Divisor is not 0, take normal path
|
||||
normal_path = J_CC(CC_NZ);
|
||||
// Otherwise, proceed to overflow case
|
||||
}
|
||||
|
||||
// Set Rd to all ones or all zeroes
|
||||
if (dividend & 0x80000000)
|
||||
MOV(32, Rd, Imm32(0xFFFFFFFF));
|
||||
else
|
||||
XOR(32, Rd, Rd);
|
||||
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(true);
|
||||
|
||||
const FixupBranch done = J();
|
||||
|
||||
SetJumpTarget(normal_path);
|
||||
|
||||
MOV(32, eax, Imm32(dividend));
|
||||
CDQ();
|
||||
IDIV(32, Rb);
|
||||
MOV(32, Rd, eax);
|
||||
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(false);
|
||||
|
||||
SetJumpTarget(done);
|
||||
}
|
||||
}
|
||||
else if (gpr.IsImm(b))
|
||||
{
|
||||
// Constant divisor
|
||||
const s32 divisor = gpr.SImm32(b);
|
||||
RCOpArg Ra = gpr.Use(a, RCMode::Read);
|
||||
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Ra, Rd);
|
||||
|
||||
// Handle 0, 1, and -1 explicitly
|
||||
if (divisor == 0)
|
||||
{
|
||||
if (d != a)
|
||||
MOV(32, Rd, Ra);
|
||||
SAR(32, Rd, Imm8(31));
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(true);
|
||||
}
|
||||
else if (divisor == 1)
|
||||
{
|
||||
if (d != a)
|
||||
MOV(32, Rd, Ra);
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(false);
|
||||
}
|
||||
else if (divisor == -1)
|
||||
{
|
||||
if (d != a)
|
||||
MOV(32, Rd, Ra);
|
||||
|
||||
NEG(32, Rd);
|
||||
const FixupBranch normal = J_CC(CC_NO);
|
||||
|
||||
MOV(32, Rd, Imm32(0xFFFFFFFF));
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(true);
|
||||
const FixupBranch done = J();
|
||||
|
||||
SetJumpTarget(normal);
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(false);
|
||||
|
||||
SetJumpTarget(done);
|
||||
}
|
||||
else if (divisor == 2 || divisor == -2)
|
||||
{
|
||||
X64Reg tmp = RSCRATCH;
|
||||
if (Ra.IsSimpleReg() && Ra.GetSimpleReg() != Rd)
|
||||
tmp = Ra.GetSimpleReg();
|
||||
else
|
||||
MOV(32, R(tmp), Ra);
|
||||
|
||||
MOV(32, Rd, R(tmp));
|
||||
SHR(32, Rd, Imm8(31));
|
||||
ADD(32, Rd, R(tmp));
|
||||
SAR(32, Rd, Imm8(1));
|
||||
|
||||
if (divisor < 0)
|
||||
NEG(32, Rd);
|
||||
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(false);
|
||||
}
|
||||
else if (MathUtil::IsPow2(divisor) || MathUtil::IsPow2(-divisor))
|
||||
{
|
||||
u32 abs_val = std::abs(divisor);
|
||||
|
||||
X64Reg tmp = RSCRATCH;
|
||||
if (Ra.IsSimpleReg() && Ra.GetSimpleReg() != Rd)
|
||||
tmp = Ra.GetSimpleReg();
|
||||
else
|
||||
MOV(32, R(tmp), Ra);
|
||||
|
||||
TEST(32, R(tmp), R(tmp));
|
||||
LEA(32, Rd, MDisp(tmp, abs_val - 1));
|
||||
CMOVcc(32, Rd, R(tmp), CC_NS);
|
||||
SAR(32, Rd, Imm8(IntLog2(abs_val)));
|
||||
|
||||
if (divisor < 0)
|
||||
NEG(32, Rd);
|
||||
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Optimize signed 32-bit integer division by a constant
|
||||
Magic m = SignedDivisionConstants(divisor);
|
||||
|
||||
MOVSX(64, 32, RSCRATCH, Ra);
|
||||
|
||||
if (divisor > 0 && m.multiplier < 0)
|
||||
{
|
||||
IMUL(64, Rd, R(RSCRATCH), Imm32(m.multiplier));
|
||||
SHR(64, Rd, Imm8(32));
|
||||
ADD(32, Rd, R(RSCRATCH));
|
||||
SHR(32, R(RSCRATCH), Imm8(31));
|
||||
SAR(32, Rd, Imm8(m.shift));
|
||||
}
|
||||
else if (divisor < 0 && m.multiplier > 0)
|
||||
{
|
||||
IMUL(64, Rd, R(RSCRATCH), Imm32(m.multiplier));
|
||||
SHR(64, R(RSCRATCH), Imm8(32));
|
||||
SUB(32, R(RSCRATCH), Rd);
|
||||
MOV(32, Rd, R(RSCRATCH));
|
||||
SHR(32, Rd, Imm8(31));
|
||||
SAR(32, R(RSCRATCH), Imm8(m.shift));
|
||||
}
|
||||
else if (m.multiplier > 0)
|
||||
{
|
||||
IMUL(64, Rd, R(RSCRATCH), Imm32(m.multiplier));
|
||||
SHR(32, R(RSCRATCH), Imm8(31));
|
||||
SAR(64, R(Rd), Imm8(32 + m.shift));
|
||||
}
|
||||
else
|
||||
{
|
||||
IMUL(64, RSCRATCH, R(RSCRATCH), Imm32(m.multiplier));
|
||||
MOV(64, Rd, R(RSCRATCH));
|
||||
SHR(64, R(RSCRATCH), Imm8(63));
|
||||
SAR(64, R(Rd), Imm8(32 + m.shift));
|
||||
}
|
||||
|
||||
ADD(32, Rd, R(RSCRATCH));
|
||||
|
||||
if (inst.OE)
|
||||
GenerateConstantOverflow(false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RCOpArg Ra = gpr.Use(a, RCMode::Read);
|
||||
@ -1364,7 +1567,6 @@ void Jit64::divwx(UGeckoInstruction inst)
|
||||
|
||||
SetJumpTarget(overflow);
|
||||
SAR(32, eax, Imm8(31));
|
||||
MOV(32, Rd, eax);
|
||||
if (inst.OE)
|
||||
{
|
||||
GenerateConstantOverflow(true);
|
||||
@ -1376,12 +1578,13 @@ void Jit64::divwx(UGeckoInstruction inst)
|
||||
|
||||
CDQ();
|
||||
IDIV(32, Rb);
|
||||
MOV(32, Rd, eax);
|
||||
if (inst.OE)
|
||||
{
|
||||
GenerateConstantOverflow(false);
|
||||
}
|
||||
|
||||
SetJumpTarget(done);
|
||||
MOV(32, Rd, eax);
|
||||
}
|
||||
if (inst.Rc)
|
||||
ComputeRC(d);
|
||||
|
57
Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp
Normal file
57
Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "Core/PowerPC/JitCommon/DivUtils.h"
|
||||
|
||||
namespace JitCommon
|
||||
{
|
||||
Magic SignedDivisionConstants(s32 d)
|
||||
{
|
||||
const u32 two31 = 2147483648;
|
||||
|
||||
const u32 ad = std::abs(d);
|
||||
const u32 t = two31 - (d < 0);
|
||||
const u32 anc = t - 1 - t % ad;
|
||||
u32 q1 = two31 / anc;
|
||||
u32 r1 = two31 - q1 * anc;
|
||||
u32 q2 = two31 / ad;
|
||||
u32 r2 = two31 - q2 * ad;
|
||||
|
||||
s32 p = 31;
|
||||
u32 delta;
|
||||
|
||||
do
|
||||
{
|
||||
p++;
|
||||
|
||||
q1 *= 2;
|
||||
r1 *= 2;
|
||||
if (r1 >= anc)
|
||||
{
|
||||
q1++;
|
||||
r1 -= anc;
|
||||
}
|
||||
|
||||
q2 *= 2;
|
||||
r2 *= 2;
|
||||
if (r2 >= ad)
|
||||
{
|
||||
q2++;
|
||||
r2 -= ad;
|
||||
}
|
||||
delta = ad - r2;
|
||||
} while (q1 < delta || (q1 == delta && r1 == 0));
|
||||
|
||||
Magic mag;
|
||||
mag.multiplier = q2 + 1;
|
||||
if (d < 0)
|
||||
mag.multiplier = -mag.multiplier;
|
||||
mag.shift = p - 32;
|
||||
|
||||
return mag;
|
||||
}
|
||||
|
||||
} // namespace JitCommon
|
22
Source/Core/Core/PowerPC/JitCommon/DivUtils.h
Normal file
22
Source/Core/Core/PowerPC/JitCommon/DivUtils.h
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
namespace JitCommon
|
||||
{
|
||||
struct Magic
|
||||
{
|
||||
s32 multiplier;
|
||||
u8 shift;
|
||||
};
|
||||
|
||||
// Calculate the constants required to optimize a signed 32-bit integer division.
|
||||
// Taken from The PowerPC Compiler Writer's Guide and LLVM.
|
||||
// Divisor must not be -1, 0, and 1.
|
||||
Magic SignedDivisionConstants(s32 divisor);
|
||||
|
||||
} // namespace JitCommon
|
@ -27,6 +27,12 @@
|
||||
<Project>{41279555-f94f-4ebc-99de-af863c10c5c4}</Project>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Core\PowerPC\JitCommon\DivUtils.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Core\PowerPC\JitCommon\DivUtils.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
33
Source/UnitTests/DivUtilsTest.cpp
Normal file
33
Source/UnitTests/DivUtilsTest.cpp
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "Core/PowerPC/JitCommon/DivUtils.h"
|
||||
|
||||
using namespace JitCommon;
|
||||
|
||||
TEST(DivUtils, Signed)
|
||||
{
|
||||
Magic m3 = SignedDivisionConstants(3);
|
||||
Magic m5 = SignedDivisionConstants(5);
|
||||
Magic m7 = SignedDivisionConstants(7);
|
||||
Magic minus3 = SignedDivisionConstants(-3);
|
||||
Magic minus5 = SignedDivisionConstants(-5);
|
||||
Magic minus7 = SignedDivisionConstants(-7);
|
||||
|
||||
EXPECT_EQ(0x55555556, m3.multiplier);
|
||||
EXPECT_EQ(0, m3.shift);
|
||||
EXPECT_EQ(0x66666667, m5.multiplier);
|
||||
EXPECT_EQ(1, m5.shift);
|
||||
EXPECT_EQ(-0x6DB6DB6D, m7.multiplier);
|
||||
EXPECT_EQ(2, m7.shift);
|
||||
|
||||
EXPECT_EQ(-0x55555556, minus3.multiplier);
|
||||
EXPECT_EQ(0, minus3.shift);
|
||||
EXPECT_EQ(-0x66666667, minus5.multiplier);
|
||||
EXPECT_EQ(1, minus5.shift);
|
||||
EXPECT_EQ(0x6DB6DB6D, minus7.multiplier);
|
||||
EXPECT_EQ(2, minus7.shift);
|
||||
}
|
@ -69,6 +69,7 @@
|
||||
<ClCompile Include="Core\IOS\FS\FileSystemTest.cpp" />
|
||||
<ClCompile Include="Core\MMIOTest.cpp" />
|
||||
<ClCompile Include="Core\PageFaultTest.cpp" />
|
||||
<ClCompile Include="DivUtilsTest.cpp" />
|
||||
<ClCompile Include="FileUtil.cpp" />
|
||||
<ClCompile Include="VideoCommon\VertexLoaderTest.cpp" />
|
||||
<ClCompile Include="StubHost.cpp" />
|
||||
|
Loading…
Reference in New Issue
Block a user