diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index afc1c9a920..671bd9fa9e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1792,7 +1792,7 @@ void Jit64::divwx(UGeckoInstruction inst) else { // Optimize signed 32-bit integer division by a constant - Magic m = SignedDivisionConstants(divisor); + SignedMagic m = SignedDivisionConstants(divisor); MOVSX(64, 32, RSCRATCH, Ra); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 43c90c9827..95f75a3f2a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1675,7 +1675,7 @@ void JitArm64::divwx(UGeckoInstruction inst) else { // Optimize signed 32-bit integer division by a constant - Magic m = SignedDivisionConstants(divisor); + SignedMagic m = SignedDivisionConstants(divisor); ARM64Reg WA = gpr.GetReg(); ARM64Reg WB = gpr.GetReg(); diff --git a/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp b/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp index f1b6a1baf6..4648407e86 100644 --- a/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp @@ -3,16 +3,18 @@ #include "Core/PowerPC/JitCommon/DivUtils.h" +#include +#include #include namespace JitCommon { -Magic SignedDivisionConstants(s32 d) +SignedMagic SignedDivisionConstants(s32 divisor) { const u32 two31 = 2147483648; - const u32 ad = std::abs(d); - const u32 t = two31 - (d < 0); + const u32 ad = std::abs(divisor); + const u32 t = two31 - (divisor < 0); const u32 anc = t - 1 - t % ad; u32 q1 = two31 / anc; u32 r1 = two31 - q1 * anc; @@ -44,13 +46,43 @@ Magic SignedDivisionConstants(s32 d) delta = ad - r2; } while (q1 < delta || (q1 == delta && r1 == 0)); - Magic mag; + SignedMagic mag; mag.multiplier = q2 + 1; - if (d < 0) + if (divisor < 0) mag.multiplier = -mag.multiplier; mag.shift = p - 32; return mag; } +UnsignedMagic UnsignedDivisionConstants(u32 divisor) +{ + u32 shift = 31 - std::countl_zero(divisor); + + u64 magic_dividend = 0x100000000ULL << shift; + u32 multiplier = magic_dividend / divisor; + u32 max_quotient = multiplier >> shift; + + // Test for failure in round-up method + u32 round_up = (u64(multiplier + 1) * (max_quotient * divisor - 1)) >> (shift + 32); + bool fast = round_up == max_quotient - 1; + + if (fast) + { + multiplier++; + + // Use smallest magic number and shift amount possible + u32 trailing_zeroes = std::min(shift, u32(std::countr_zero(multiplier))); + multiplier >>= trailing_zeroes; + shift -= trailing_zeroes; + } + + UnsignedMagic mag; + mag.multiplier = multiplier; + mag.shift = shift; + mag.fast = fast; + + return mag; +} + } // namespace JitCommon diff --git a/Source/Core/Core/PowerPC/JitCommon/DivUtils.h b/Source/Core/Core/PowerPC/JitCommon/DivUtils.h index 73d91426e1..2cc3f2e494 100644 --- a/Source/Core/Core/PowerPC/JitCommon/DivUtils.h +++ b/Source/Core/Core/PowerPC/JitCommon/DivUtils.h @@ -7,7 +7,7 @@ namespace JitCommon { -struct Magic +struct SignedMagic { s32 multiplier; u8 shift; @@ -16,6 +16,27 @@ struct Magic // Calculate the constants required to optimize a signed 32-bit integer division. // Taken from The PowerPC Compiler Writer's Guide and LLVM. // Divisor must not be -1, 0, 1 or INT_MIN. -Magic SignedDivisionConstants(s32 divisor); +SignedMagic SignedDivisionConstants(s32 divisor); + +struct UnsignedMagic +{ + u32 multiplier; + u8 shift; + bool fast; +}; + +/// Calculate the constants required to optimize an unsigned 32-bit integer +/// division. +/// Divisor must not be 0, 1, or a power of two. +/// +/// Original implementation by calc84maniac. +/// Results are the same as the approach laid out in Hacker's Delight, with an +/// improvement for so-called uncooperative divisors (e.g. 7), as discovered by +/// ridiculousfish. +/// +/// See also: +/// https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html +/// https://rubenvannieuwpoort.nl/posts/division-by-constant-unsigned-integers +UnsignedMagic UnsignedDivisionConstants(u32 divisor); } // namespace JitCommon diff --git a/Source/UnitTests/Core/PowerPC/DivUtilsTest.cpp b/Source/UnitTests/Core/PowerPC/DivUtilsTest.cpp index 894d0f628a..b218745849 100644 --- a/Source/UnitTests/Core/PowerPC/DivUtilsTest.cpp +++ b/Source/UnitTests/Core/PowerPC/DivUtilsTest.cpp @@ -9,12 +9,12 @@ using namespace JitCommon; TEST(DivUtils, Signed) { - Magic m3 = SignedDivisionConstants(3); - Magic m5 = SignedDivisionConstants(5); - Magic m7 = SignedDivisionConstants(7); - Magic minus3 = SignedDivisionConstants(-3); - Magic minus5 = SignedDivisionConstants(-5); - Magic minus7 = SignedDivisionConstants(-7); + SignedMagic m3 = SignedDivisionConstants(3); + SignedMagic m5 = SignedDivisionConstants(5); + SignedMagic m7 = SignedDivisionConstants(7); + SignedMagic minus3 = SignedDivisionConstants(-3); + SignedMagic minus5 = SignedDivisionConstants(-5); + SignedMagic minus7 = SignedDivisionConstants(-7); EXPECT_EQ(0x55555556, m3.multiplier); EXPECT_EQ(0, m3.shift); @@ -30,3 +30,32 @@ TEST(DivUtils, Signed) EXPECT_EQ(0x6DB6DB6D, minus7.multiplier); EXPECT_EQ(2, minus7.shift); } + +TEST(DivUtils, Unsigned) +{ + UnsignedMagic m3 = UnsignedDivisionConstants(3); + UnsignedMagic m5 = UnsignedDivisionConstants(5); + UnsignedMagic m7 = UnsignedDivisionConstants(7); + UnsignedMagic m9 = UnsignedDivisionConstants(9); + UnsignedMagic m19 = UnsignedDivisionConstants(19); + + EXPECT_EQ(0xAAAAAAABU, m3.multiplier); + EXPECT_EQ(1, m3.shift); + EXPECT_TRUE(m3.fast); + + EXPECT_EQ(0xCCCCCCCDU, m5.multiplier); + EXPECT_EQ(2, m5.shift); + EXPECT_TRUE(m5.fast); + + EXPECT_EQ(0x92492492U, m7.multiplier); + EXPECT_EQ(2, m7.shift); + EXPECT_FALSE(m7.fast); + + EXPECT_EQ(0x38E38E39U, m9.multiplier); + EXPECT_EQ(1, m9.shift); + EXPECT_TRUE(m9.fast); + + EXPECT_EQ(0xD79435E5U, m19.multiplier); + EXPECT_EQ(4, m19.shift); + EXPECT_FALSE(m19.fast); +}