DivUtils: Add unsigned division magic function

Takes the logic from Jit64 and moves it into DivUtils, so it can be
reused by other backends as well.
This commit is contained in:
Bram Speeckaert 2024-03-23 13:02:41 +01:00
parent 3948ac9513
commit 825a10616c
5 changed files with 97 additions and 15 deletions

View File

@ -1792,7 +1792,7 @@ void Jit64::divwx(UGeckoInstruction inst)
else
{
// Optimize signed 32-bit integer division by a constant
Magic m = SignedDivisionConstants(divisor);
SignedMagic m = SignedDivisionConstants(divisor);
MOVSX(64, 32, RSCRATCH, Ra);

View File

@ -1675,7 +1675,7 @@ void JitArm64::divwx(UGeckoInstruction inst)
else
{
// Optimize signed 32-bit integer division by a constant
Magic m = SignedDivisionConstants(divisor);
SignedMagic m = SignedDivisionConstants(divisor);
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg();

View File

@ -3,16 +3,18 @@
#include "Core/PowerPC/JitCommon/DivUtils.h"
#include <algorithm>
#include <bit>
#include <cstdlib>
namespace JitCommon
{
Magic SignedDivisionConstants(s32 d)
SignedMagic SignedDivisionConstants(s32 divisor)
{
const u32 two31 = 2147483648;
const u32 ad = std::abs(d);
const u32 t = two31 - (d < 0);
const u32 ad = std::abs(divisor);
const u32 t = two31 - (divisor < 0);
const u32 anc = t - 1 - t % ad;
u32 q1 = two31 / anc;
u32 r1 = two31 - q1 * anc;
@ -44,13 +46,43 @@ Magic SignedDivisionConstants(s32 d)
delta = ad - r2;
} while (q1 < delta || (q1 == delta && r1 == 0));
Magic mag;
SignedMagic mag;
mag.multiplier = q2 + 1;
if (d < 0)
if (divisor < 0)
mag.multiplier = -mag.multiplier;
mag.shift = p - 32;
return mag;
}
UnsignedMagic UnsignedDivisionConstants(u32 divisor)
{
u32 shift = 31 - std::countl_zero(divisor);
u64 magic_dividend = 0x100000000ULL << shift;
u32 multiplier = magic_dividend / divisor;
u32 max_quotient = multiplier >> shift;
// Test for failure in round-up method
u32 round_up = (u64(multiplier + 1) * (max_quotient * divisor - 1)) >> (shift + 32);
bool fast = round_up == max_quotient - 1;
if (fast)
{
multiplier++;
// Use smallest magic number and shift amount possible
u32 trailing_zeroes = std::min(shift, u32(std::countr_zero(multiplier)));
multiplier >>= trailing_zeroes;
shift -= trailing_zeroes;
}
UnsignedMagic mag;
mag.multiplier = multiplier;
mag.shift = shift;
mag.fast = fast;
return mag;
}
} // namespace JitCommon

View File

@ -7,7 +7,7 @@
namespace JitCommon
{
struct Magic
struct SignedMagic
{
s32 multiplier;
u8 shift;
@ -16,6 +16,27 @@ struct Magic
// Calculate the constants required to optimize a signed 32-bit integer division.
// Taken from The PowerPC Compiler Writer's Guide and LLVM.
// Divisor must not be -1, 0, 1 or INT_MIN.
Magic SignedDivisionConstants(s32 divisor);
SignedMagic SignedDivisionConstants(s32 divisor);
struct UnsignedMagic
{
u32 multiplier;
u8 shift;
bool fast;
};
/// Calculate the constants required to optimize an unsigned 32-bit integer
/// division.
/// Divisor must not be 0, 1, or a power of two.
///
/// Original implementation by calc84maniac.
/// Results are the same as the approach laid out in Hacker's Delight, with an
/// improvement for so-called uncooperative divisors (e.g. 7), as discovered by
/// ridiculousfish.
///
/// See also:
/// https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
/// https://rubenvannieuwpoort.nl/posts/division-by-constant-unsigned-integers
UnsignedMagic UnsignedDivisionConstants(u32 divisor);
} // namespace JitCommon

View File

@ -9,12 +9,12 @@ using namespace JitCommon;
TEST(DivUtils, Signed)
{
Magic m3 = SignedDivisionConstants(3);
Magic m5 = SignedDivisionConstants(5);
Magic m7 = SignedDivisionConstants(7);
Magic minus3 = SignedDivisionConstants(-3);
Magic minus5 = SignedDivisionConstants(-5);
Magic minus7 = SignedDivisionConstants(-7);
SignedMagic m3 = SignedDivisionConstants(3);
SignedMagic m5 = SignedDivisionConstants(5);
SignedMagic m7 = SignedDivisionConstants(7);
SignedMagic minus3 = SignedDivisionConstants(-3);
SignedMagic minus5 = SignedDivisionConstants(-5);
SignedMagic minus7 = SignedDivisionConstants(-7);
EXPECT_EQ(0x55555556, m3.multiplier);
EXPECT_EQ(0, m3.shift);
@ -30,3 +30,32 @@ TEST(DivUtils, Signed)
EXPECT_EQ(0x6DB6DB6D, minus7.multiplier);
EXPECT_EQ(2, minus7.shift);
}
TEST(DivUtils, Unsigned)
{
UnsignedMagic m3 = UnsignedDivisionConstants(3);
UnsignedMagic m5 = UnsignedDivisionConstants(5);
UnsignedMagic m7 = UnsignedDivisionConstants(7);
UnsignedMagic m9 = UnsignedDivisionConstants(9);
UnsignedMagic m19 = UnsignedDivisionConstants(19);
EXPECT_EQ(0xAAAAAAABU, m3.multiplier);
EXPECT_EQ(1, m3.shift);
EXPECT_TRUE(m3.fast);
EXPECT_EQ(0xCCCCCCCDU, m5.multiplier);
EXPECT_EQ(2, m5.shift);
EXPECT_TRUE(m5.fast);
EXPECT_EQ(0x92492492U, m7.multiplier);
EXPECT_EQ(2, m7.shift);
EXPECT_FALSE(m7.fast);
EXPECT_EQ(0x38E38E39U, m9.multiplier);
EXPECT_EQ(1, m9.shift);
EXPECT_TRUE(m9.fast);
EXPECT_EQ(0xD79435E5U, m19.multiplier);
EXPECT_EQ(4, m19.shift);
EXPECT_FALSE(m19.fast);
}