From 688644dd1895ebf7a6ad93f94a735ab1e4be8e7e Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Thu, 24 Apr 2014 18:18:22 +0200 Subject: [PATCH] Jit64: use integer instructions for boolean logic They are semantically equivalent and according to Agner Fog they can run on more execution ports than their floating point counterparts (at least on Intel CPUs). --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 10 +++++----- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- .../Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 18 +++++++++--------- .../JitILCommon/JitILBase_LoadStore.cpp | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index dfb3519da2..e733c3f56a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -156,12 +156,12 @@ void Jit64::fmaddXX(UGeckoInstruction inst) case 30: //nmsub MULSD(XMM0, fpr.R(c)); SUBSD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits2)); + PXOR(XMM0, M((void*)&psSignBits2)); break; case 31: //nmadd MULSD(XMM0, fpr.R(c)); ADDSD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits2)); + PXOR(XMM0, M((void*)&psSignBits2)); break; } fpr.BindToRegister(d, false); @@ -197,13 +197,13 @@ void Jit64::fsign(UGeckoInstruction inst) MOVSD(XMM0, fpr.R(b)); switch (inst.SUBOP10) { case 40: // fnegx - XORPD(XMM0, M((void*)&psSignBits2)); + PXOR(XMM0, M((void*)&psSignBits2)); break; case 264: // fabsx - ANDPD(XMM0, M((void*)&psAbsMask2)); + PAND(XMM0, M((void*)&psAbsMask2)); break; case 136: // fnabs - ORPD(XMM0, M((void*)&psSignBits2)); + POR(XMM0, M((void*)&psSignBits2)); break; default: PanicAlert("fsign bleh"); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 1473d21dd3..f408a5b7bd 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -253,7 +253,7 @@ void Jit64::dcbz(UGeckoInstruction inst) if (inst.RA) ADD(32, R(EAX), gpr.R(inst.RA)); AND(32, R(EAX), Imm32(~31)); - XORPD(XMM0, R(XMM0)); + PXOR(XMM0, R(XMM0)); #if _M_X86_64 MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 22d7112798..9194220285 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -58,7 +58,7 @@ void Jit64::psq_st(UGeckoInstruction inst) #endif if (inst.W) { // One value - XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. + PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized)); } else { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index e3da5bc251..58d73a845e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -59,13 +59,13 @@ void Jit64::ps_sel(UGeckoInstruction inst) fpr.Lock(a, b, c, d); MOVAPD(XMM0, fpr.R(a)); - XORPD(XMM1, R(XMM1)); + PXOR(XMM1, R(XMM1)); // XMM0 = XMM0 < 0 ? all 1s : all 0s CMPPD(XMM0, R(XMM1), LT); MOVAPD(XMM1, R(XMM0)); - ANDPD(XMM0, fpr.R(b)); - ANDNPD(XMM1, fpr.R(c)); - ORPD(XMM0, R(XMM1)); + PAND(XMM0, fpr.R(b)); + PANDN(XMM1, fpr.R(c)); + POR(XMM0, R(XMM1)); fpr.BindToRegister(d, false); MOVAPD(fpr.RX(d), R(XMM0)); fpr.UnlockAll(); @@ -99,13 +99,13 @@ void Jit64::ps_sign(UGeckoInstruction inst) switch (inst.SUBOP10) { case 40: //neg - XORPD(fpr.RX(d), M((void*)&psSignBits)); + PXOR(fpr.RX(d), M((void*)&psSignBits)); break; case 136: //nabs - ORPD(fpr.RX(d), M((void*)&psSignBits)); + POR(fpr.RX(d), M((void*)&psSignBits)); break; case 264: //abs - ANDPD(fpr.RX(d), M((void*)&psAbsMask)); + PAND(fpr.RX(d), M((void*)&psAbsMask)); break; } @@ -391,12 +391,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst) case 30: //nmsub MULPD(XMM0, fpr.R(c)); SUBPD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits)); + PXOR(XMM0, M((void*)&psSignBits)); break; case 31: //nmadd MULPD(XMM0, fpr.R(c)); ADDPD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits)); + PXOR(XMM0, M((void*)&psSignBits)); break; default: _assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!"); diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp index 5607e85250..d41f613f72 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp @@ -146,7 +146,7 @@ void JitILBase::dcbz(UGeckoInstruction inst) if (inst.RA) ADD(32, R(EAX), gpr.R(inst.RA)); AND(32, R(EAX), Imm32(~31)); - XORPD(XMM0, R(XMM0)); + PXOR(XMM0, R(XMM0)); #if _M_X86_64 MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);