Merge pull request #1083 from FioraAeterna/lzcnt

Add LZCNT support, use in cntlzw
This commit is contained in:
Ryan Houdek 2014-09-14 09:18:10 -05:00
commit 4e16abd742
5 changed files with 64 additions and 43 deletions

View File

@ -197,6 +197,7 @@ void CPUInfo::Detect()
// Check for more features.
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1) bLAHFSAHF64 = true;
if ((cpu_id[2] >> 5) & 1) bLZCNT = true;
if ((cpu_id[3] >> 29) & 1) bLongMode = true;
}

View File

@ -750,12 +750,14 @@ void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2)
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
{
_assert_msg_(DYNA_REC, !src.IsImm(), "WriteBitSearchType - Imm argument");
src.operandReg = (u8)dest;
if (bits == 16)
Write8(0x66);
if (rep)
Write8(0xF3);
src.WriteRex(this, bits, bits);
Write8(0x0F);
Write8(byte2);
@ -772,6 +774,19 @@ void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
{
if (!cpu_info.bBMI1)
PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
WriteBitSearchType(bits, dest, src, 0xBC, true);
}
void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
{
if (!cpu_info.bLZCNT)
PanicAlert("Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
WriteBitSearchType(bits, dest, src, 0xBD, true);
}
void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
{
_assert_msg_(DYNA_REC, !src.IsImm(), "MOVSX - Imm argument");

View File

@ -266,7 +266,7 @@ private:
void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
void WriteMulDivType(int bits, OpArg src, int ext);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
void WriteMXCSR(OpArg arg, int ext);
@ -454,6 +454,11 @@ public:
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
// Available only on AMD >= Phenom or Intel >= Haswell
void LZCNT(int bits, X64Reg dest, OpArg src);
// Note: this one is actually part of BMI1
void TZCNT(int bits, X64Reg dest, OpArg src);
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
void STMXCSR(OpArg memloc);
void LDMXCSR(OpArg memloc);

View File

@ -1905,13 +1905,19 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
else
{
gpr.Lock(a, s);
gpr.KillImmediate(s, true, false);
gpr.BindToRegister(a, (a == s), true);
BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s));
gpr.BindToRegister(a, a == s, true);
if (cpu_info.bLZCNT)
{
LZCNT(32, gpr.RX(a), gpr.R(s));
}
else
{
BSR(32, gpr.RX(a), gpr.R(s));
FixupBranch gotone = J_CC(CC_NZ);
MOV(32, gpr.R(a), Imm32(63));
SetJumpTarget(gotone);
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
}
gpr.UnlockAll();
}

View File

@ -318,41 +318,35 @@ TEST_F(x64EmitterTest, CMOVcc_Register)
}
}
TEST_F(x64EmitterTest, BSF)
{
emitter->BSF(64, R12, R(RAX));
emitter->BSF(32, R12, R(RAX));
emitter->BSF(16, R12, R(RAX));
emitter->BSF(64, R12, MatR(RAX));
emitter->BSF(32, R12, MatR(RAX));
emitter->BSF(16, R12, MatR(RAX));
ExpectDisassembly("bsf r12, rax "
"bsf r12d, eax "
"bsf r12w, ax "
"bsf r12, qword ptr ds:[rax] "
"bsf r12d, dword ptr ds:[rax] "
"bsf r12w, word ptr ds:[rax]");
#define BITSEARCH_TEST(Name) \
TEST_F(x64EmitterTest, Name) \
{ \
struct { \
int bits; \
std::vector<NamedReg> regs; \
std::string size; \
std::string rax_name; \
} regsets[] = { \
{ 16, reg16names, "word", "ax" }, \
{ 32, reg32names, "dword", "eax" }, \
{ 64, reg64names, "qword", "rax" }, \
}; \
for (const auto& regset : regsets) \
for (const auto& r : regset.regs) \
{ \
emitter->Name(regset.bits, r.reg, R(RAX)); \
emitter->Name(regset.bits, RAX, R(r.reg)); \
emitter->Name(regset.bits, r.reg, MatR(RAX)); \
ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \
#Name " " + regset.rax_name + ", " + r.name + " " \
#Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " ); \
} \
}
TEST_F(x64EmitterTest, BSR)
{
emitter->BSR(64, R12, R(RAX));
emitter->BSR(32, R12, R(RAX));
emitter->BSR(16, R12, R(RAX));
emitter->BSR(64, R12, MatR(RAX));
emitter->BSR(32, R12, MatR(RAX));
emitter->BSR(16, R12, MatR(RAX));
ExpectDisassembly("bsr r12, rax "
"bsr r12d, eax "
"bsr r12w, ax "
"bsr r12, qword ptr ds:[rax] "
"bsr r12d, dword ptr ds:[rax] "
"bsr r12w, word ptr ds:[rax]");
}
BITSEARCH_TEST(BSR);
BITSEARCH_TEST(BSF);
BITSEARCH_TEST(LZCNT);
BITSEARCH_TEST(TZCNT);
TEST_F(x64EmitterTest, PREFETCH)
{