From c3483a1823d5a5f4a8174682228f78d00cee3247 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 31 Mar 2018 17:09:33 -0400 Subject: [PATCH] CommonFuncs: Generify rotation functions and move them to BitUtils.h These are bit manipulation functions, so they belong within BitUtils. This also gets rid of duplicated code and avoids relying on compiler reserved names existing or not existing to determine whether or not we define a set of functions. Optimizers are smart enough in GCC and clang to transform the code to a ROR or ROL instruction in the respective functions. --- Source/Core/Common/BitUtils.h | 44 +++++++++++++++++++ Source/Core/Common/CommonFuncs.h | 32 -------------- Source/Core/Common/Hash.cpp | 14 +++--- Source/Core/Core/ARDecrypt.cpp | 30 ++++++------- .../Interpreter/Interpreter_Integer.cpp | 9 ++-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 10 +++-- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 7 +-- 7 files changed, 81 insertions(+), 65 deletions(-) diff --git a/Source/Core/Common/BitUtils.h b/Source/Core/Common/BitUtils.h index 8d5398fb39..2d45902e41 100644 --- a/Source/Core/Common/BitUtils.h +++ b/Source/Core/Common/BitUtils.h @@ -100,6 +100,50 @@ constexpr Result ExtractBits(const T src) noexcept return ExtractBits(src, begin, end); } +/// +/// Rotates a value left (ROL). +/// +/// @param value The value to rotate. +/// @param amount The number of bits to rotate the value. +/// @tparam T An unsigned type. +/// +/// @return The rotated value. +/// +template +constexpr T RotateLeft(const T value, size_t amount) noexcept +{ + static_assert(std::is_unsigned(), "Can only rotate unsigned types left."); + + amount %= BitSize(); + + if (amount == 0) + return value; + + return static_cast((value << amount) | (value >> (BitSize() - amount))); +} + +/// +/// Rotates a value right (ROR). +/// +/// @param value The value to rotate. +/// @param amount The number of bits to rotate the value. +/// @tparam T An unsigned type. +/// +/// @return The rotated value. +/// +template +constexpr T RotateRight(const T value, size_t amount) noexcept +{ + static_assert(std::is_unsigned(), "Can only rotate unsigned types right."); + + amount %= BitSize(); + + if (amount == 0) + return value; + + return static_cast((value >> amount) | (value << (BitSize() - amount))); +} + /// /// Verifies whether the supplied value is a valid bit mask of the form 0b00...0011...11. /// Both edge cases of all zeros and all ones are considered valid masks, too. diff --git a/Source/Core/Common/CommonFuncs.h b/Source/Core/Common/CommonFuncs.h index 95a4b192e8..11828a72c1 100644 --- a/Source/Core/Common/CommonFuncs.h +++ b/Source/Core/Common/CommonFuncs.h @@ -30,38 +30,6 @@ constexpr size_t ArraySize(T (&arr)[N]) __builtin_trap(); \ } -// GCC 4.8 defines all the rotate functions now -// Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit -#ifndef _rotl -inline u32 _rotl(u32 x, int shift) -{ - shift &= 31; - if (!shift) - return x; - return (x << shift) | (x >> (32 - shift)); -} - -inline u32 _rotr(u32 x, int shift) -{ - shift &= 31; - if (!shift) - return x; - return (x >> shift) | (x << (32 - shift)); -} -#endif - -inline u64 _rotl64(u64 x, unsigned int shift) -{ - unsigned int n = shift % 64; - return (x << n) | (x >> (64 - n)); -} - -inline u64 _rotr64(u64 x, unsigned int shift) -{ - unsigned int n = shift % 64; - return (x >> n) | (x << (64 - n)); -} - #else // WIN32 // Function Cross-Compatibility #define strcasecmp _stricmp diff --git a/Source/Core/Common/Hash.cpp b/Source/Core/Common/Hash.cpp index 7074e12e55..2678ab89bc 100644 --- a/Source/Core/Common/Hash.cpp +++ b/Source/Core/Common/Hash.cpp @@ -3,8 +3,10 @@ // Refer to the license.txt file included. #include "Common/Hash.h" + #include #include +#include "Common/BitUtils.h" #include "Common/CPUDetect.h" #include "Common/CommonFuncs.h" #include "Common/Intrinsics.h" @@ -117,15 +119,15 @@ static u64 getblock(const u64* p, int i) static void bmix64(u64& h1, u64& h2, u64& k1, u64& k2, u64& c1, u64& c2) { k1 *= c1; - k1 = _rotl64(k1, 23); + k1 = Common::RotateLeft(k1, 23); k1 *= c2; h1 ^= k1; h1 += h2; - h2 = _rotl64(h2, 41); + h2 = Common::RotateLeft(h2, 41); k2 *= c2; - k2 = _rotl64(k2, 23); + k2 = Common::RotateLeft(k2, 23); k2 *= c1; h2 ^= k2; h2 += h1; @@ -396,15 +398,15 @@ static u32 fmix32(u32 h) static void bmix32(u32& h1, u32& h2, u32& k1, u32& k2, u32& c1, u32& c2) { k1 *= c1; - k1 = _rotl(k1, 11); + k1 = Common::RotateLeft(k1, 11); k1 *= c2; h1 ^= k1; h1 += h2; - h2 = _rotl(h2, 17); + h2 = Common::RotateLeft(h2, 17); k2 *= c2; - k2 = _rotl(k2, 11); + k2 = Common::RotateLeft(k2, 11); k2 *= c1; h2 ^= k2; h2 += h1; diff --git a/Source/Core/Core/ARDecrypt.cpp b/Source/Core/Core/ARDecrypt.cpp index 77bfc175d3..af4cad929d 100644 --- a/Source/Core/Core/ARDecrypt.cpp +++ b/Source/Core/Core/ARDecrypt.cpp @@ -17,7 +17,7 @@ #include #endif -#include "Common/CommonFuncs.h" +#include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" #include "Common/Swap.h" @@ -251,26 +251,26 @@ static void unscramble1(u32* addr, u32* val) { u32 tmp; - *val = _rotl(*val, 4); + *val = Common::RotateLeft(*val, 4); tmp = ((*addr ^ *val) & 0xF0F0F0F0); *addr ^= tmp; - *val = _rotr((*val ^ tmp), 0x14); + *val = Common::RotateRight((*val ^ tmp), 0x14); tmp = ((*addr ^ *val) & 0xFFFF0000); *addr ^= tmp; - *val = _rotr((*val ^ tmp), 0x12); + *val = Common::RotateRight((*val ^ tmp), 0x12); tmp = ((*addr ^ *val) & 0x33333333); *addr ^= tmp; - *val = _rotr((*val ^ tmp), 6); + *val = Common::RotateRight((*val ^ tmp), 6); tmp = ((*addr ^ *val) & 0x00FF00FF); *addr ^= tmp; - *val = _rotl((*val ^ tmp), 9); + *val = Common::RotateLeft((*val ^ tmp), 9); tmp = ((*addr ^ *val) & 0xAAAAAAAA); - *addr = _rotl((*addr ^ tmp), 1); + *addr = Common::RotateLeft((*addr ^ tmp), 1); *val ^= tmp; } @@ -278,27 +278,27 @@ static void unscramble2(u32* addr, u32* val) { u32 tmp; - *val = _rotr(*val, 1); + *val = Common::RotateRight(*val, 1); tmp = ((*addr ^ *val) & 0xAAAAAAAA); *val ^= tmp; - *addr = _rotr((*addr ^ tmp), 9); + *addr = Common::RotateRight((*addr ^ tmp), 9); tmp = ((*addr ^ *val) & 0x00FF00FF); *val ^= tmp; - *addr = _rotl((*addr ^ tmp), 6); + *addr = Common::RotateLeft((*addr ^ tmp), 6); tmp = ((*addr ^ *val) & 0x33333333); *val ^= tmp; - *addr = _rotl((*addr ^ tmp), 0x12); + *addr = Common::RotateLeft((*addr ^ tmp), 0x12); tmp = ((*addr ^ *val) & 0xFFFF0000); *val ^= tmp; - *addr = _rotl((*addr ^ tmp), 0x14); + *addr = Common::RotateLeft((*addr ^ tmp), 0x14); tmp = ((*addr ^ *val) & 0xF0F0F0F0); *val ^= tmp; - *addr = _rotr((*addr ^ tmp), 4); + *addr = Common::RotateRight((*addr ^ tmp), 4); } static void decryptcode(const u32* seeds, u32* code) @@ -311,13 +311,13 @@ static void decryptcode(const u32* seeds, u32* code) unscramble1(&addr, &val); while (i < 32) { - tmp = (_rotr(val, 4) ^ seeds[i++]); + tmp = (Common::RotateRight(val, 4) ^ seeds[i++]); tmp2 = (val ^ seeds[i++]); addr ^= (table6[tmp & 0x3F] ^ table4[(tmp >> 8) & 0x3F] ^ table2[(tmp >> 16) & 0x3F] ^ table0[(tmp >> 24) & 0x3F] ^ table7[tmp2 & 0x3F] ^ table5[(tmp2 >> 8) & 0x3F] ^ table3[(tmp2 >> 16) & 0x3F] ^ table1[(tmp2 >> 24) & 0x3F]); - tmp = (_rotr(addr, 4) ^ seeds[i++]); + tmp = (Common::RotateRight(addr, 4) ^ seeds[i++]); tmp2 = (addr ^ seeds[i++]); val ^= (table6[tmp & 0x3F] ^ table4[(tmp >> 8) & 0x3F] ^ table2[(tmp >> 16) & 0x3F] ^ table0[(tmp >> 24) & 0x3F] ^ table7[tmp2 & 0x3F] ^ table5[(tmp2 >> 8) & 0x3F] ^ diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index 71c27599e0..d40f110481 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -4,10 +4,9 @@ #include "Core/PowerPC/Interpreter/Interpreter.h" -#include "Common/CommonFuncs.h" +#include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" #include "Core/PowerPC/PowerPC.h" void Interpreter::Helper_UpdateCR0(u32 value) @@ -172,7 +171,7 @@ void Interpreter::xoris(UGeckoInstruction inst) void Interpreter::rlwimix(UGeckoInstruction inst) { u32 mask = Helper_Mask(inst.MB, inst.ME); - rGPR[inst.RA] = (rGPR[inst.RA] & ~mask) | (_rotl(rGPR[inst.RS], inst.SH) & mask); + rGPR[inst.RA] = (rGPR[inst.RA] & ~mask) | (Common::RotateLeft(rGPR[inst.RS], inst.SH) & mask); if (inst.Rc) Helper_UpdateCR0(rGPR[inst.RA]); @@ -181,7 +180,7 @@ void Interpreter::rlwimix(UGeckoInstruction inst) void Interpreter::rlwinmx(UGeckoInstruction inst) { u32 mask = Helper_Mask(inst.MB, inst.ME); - rGPR[inst.RA] = _rotl(rGPR[inst.RS], inst.SH) & mask; + rGPR[inst.RA] = Common::RotateLeft(rGPR[inst.RS], inst.SH) & mask; if (inst.Rc) Helper_UpdateCR0(rGPR[inst.RA]); @@ -190,7 +189,7 @@ void Interpreter::rlwinmx(UGeckoInstruction inst) void Interpreter::rlwnmx(UGeckoInstruction inst) { u32 mask = Helper_Mask(inst.MB, inst.ME); - rGPR[inst.RA] = _rotl(rGPR[inst.RS], rGPR[inst.RB] & 0x1F) & mask; + rGPR[inst.RA] = Common::RotateLeft(rGPR[inst.RS], rGPR[inst.RB] & 0x1F) & mask; if (inst.Rc) Helper_UpdateCR0(rGPR[inst.RA]); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 86dceb7d07..6a3a825622 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -7,6 +7,7 @@ #include #include "Common/Assert.h" +#include "Common/BitUtils.h" #include "Common/CPUDetect.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" @@ -1435,7 +1436,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { u32 result = gpr.R(s).Imm32(); if (inst.SH != 0) - result = _rotl(result, inst.SH); + result = Common::RotateLeft(result, inst.SH); result &= Helper_Mask(inst.MB, inst.ME); gpr.SetImmediate32(a, result); if (inst.Rc) @@ -1520,7 +1521,8 @@ void Jit64::rlwimix(UGeckoInstruction inst) if (gpr.R(a).IsImm() && gpr.R(s).IsImm()) { u32 mask = Helper_Mask(inst.MB, inst.ME); - gpr.SetImmediate32(a, (gpr.R(a).Imm32() & ~mask) | (_rotl(gpr.R(s).Imm32(), inst.SH) & mask)); + gpr.SetImmediate32(a, (gpr.R(a).Imm32() & ~mask) | + (Common::RotateLeft(gpr.R(s).Imm32(), inst.SH) & mask)); if (inst.Rc) ComputeRC(gpr.R(a)); } @@ -1546,7 +1548,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) { gpr.BindToRegister(a, true, true); AndWithMask(gpr.RX(a), ~mask); - OR(32, gpr.R(a), Imm32(_rotl(gpr.R(s).Imm32(), inst.SH) & mask)); + OR(32, gpr.R(a), Imm32(Common::RotateLeft(gpr.R(s).Imm32(), inst.SH) & mask)); } else if (inst.SH) { @@ -1620,7 +1622,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) u32 mask = Helper_Mask(inst.MB, inst.ME); if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) { - gpr.SetImmediate32(a, _rotl(gpr.R(s).Imm32(), gpr.R(b).Imm32() & 0x1F) & mask); + gpr.SetImmediate32(a, Common::RotateLeft(gpr.R(s).Imm32(), gpr.R(b).Imm32() & 0x1F) & mask); } else { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 82c9e11800..a620e4b9ad 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -4,6 +4,7 @@ #include "Common/Arm64Emitter.h" #include "Common/Assert.h" +#include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Core/Core.h" @@ -534,7 +535,7 @@ void JitArm64::rlwinmx(UGeckoInstruction inst) u32 mask = Helper_Mask(inst.MB, inst.ME); if (gpr.IsImm(inst.RS)) { - gpr.SetImmediate(a, _rotl(gpr.GetImm(s), inst.SH) & mask); + gpr.SetImmediate(a, Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask); if (inst.Rc) ComputeRC0(gpr.GetImm(a)); return; @@ -583,7 +584,7 @@ void JitArm64::rlwnmx(UGeckoInstruction inst) if (gpr.IsImm(b) && gpr.IsImm(s)) { - gpr.SetImmediate(a, _rotl(gpr.GetImm(s), gpr.GetImm(b) & 0x1F) & mask); + gpr.SetImmediate(a, Common::RotateLeft(gpr.GetImm(s), gpr.GetImm(b) & 0x1F) & mask); if (inst.Rc) ComputeRC0(gpr.GetImm(a)); } @@ -1437,7 +1438,7 @@ void JitArm64::rlwimix(UGeckoInstruction inst) if (gpr.IsImm(a) && gpr.IsImm(s)) { - u32 res = (gpr.GetImm(a) & ~mask) | (_rotl(gpr.GetImm(s), inst.SH) & mask); + u32 res = (gpr.GetImm(a) & ~mask) | (Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask); gpr.SetImmediate(a, res); if (inst.Rc) ComputeRC0(res);