From c130a496f28010d6bc59bb9c45013f69fbdd3dc3 Mon Sep 17 00:00:00 2001 From: Fiora Date: Fri, 19 Sep 2014 01:40:02 -0700 Subject: [PATCH] JIT: fix fsel/ps_sel implementations for NaN input fselx was the main problem, but ps_sel was wrong too (even if there were no known reported bugs with it). This fixes Beyond Good and Evil (at the least). --- Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 12 +++++++----- Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 12 ++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index a20dd17e89..f79818fa77 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -212,10 +212,12 @@ void Jit64::fselx(UGeckoInstruction inst) int c = inst.FC; fpr.Lock(a, b, c, d); - MOVSD(XMM0, fpr.R(a)); - PXOR(XMM1, R(XMM1)); - // XMM0 = XMM0 < 0 ? all 1s : all 0s - CMPSD(XMM0, R(XMM1), LT); + MOVSD(XMM1, fpr.R(a)); + PXOR(XMM0, R(XMM0)); + // This condition is very tricky; there's only one right way to handle both the case of + // negative/positive zero and NaN properly. + // (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE. + CMPSD(XMM0, R(XMM1), NLE); if (cpu_info.bSSE4_1) { MOVSD(XMM1, fpr.R(c)); @@ -228,7 +230,7 @@ void Jit64::fselx(UGeckoInstruction inst) PANDN(XMM1, fpr.R(c)); POR(XMM1, R(XMM0)); } - fpr.BindToRegister(d, false); + fpr.BindToRegister(d, true); MOVSD(fpr.RX(d), R(XMM1)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index f08fb6b863..83fe2ca4cf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -43,17 +43,17 @@ void Jit64::ps_sel(UGeckoInstruction inst) if (cpu_info.bSSE4_1) { - MOVAPD(XMM0, fpr.R(a)); - PXOR(XMM1, R(XMM1)); - CMPPD(XMM0, R(XMM1), LT); // XMM0 = XMM0 < 0 ? all 1s : all 0s + MOVAPD(XMM1, fpr.R(a)); + PXOR(XMM0, R(XMM0)); + CMPPD(XMM0, R(XMM1), NLE); MOVAPD(XMM1, fpr.R(c)); BLENDVPD(XMM1, fpr.R(b)); } else { - MOVAPD(XMM1, fpr.R(a)); - PXOR(XMM0, R(XMM0)); - CMPPD(XMM1, R(XMM0), LT); // XMM0 = XMM0 < 0 ? all 1s : all 0s + MOVAPD(XMM0, fpr.R(a)); + PXOR(XMM1, R(XMM1)); + CMPPD(XMM1, R(XMM0), NLE); MOVAPD(XMM0, R(XMM1)); PAND(XMM1, fpr.R(b)); PANDN(XMM0, fpr.R(c));