From c361f9155b06f605cf063d8fd2b1a1c7dfba6c15 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 8 Oct 2022 17:45:17 +0200 Subject: [PATCH] Jit64: Turn SNaN into QNaN in HandleNaNs Improves accuracy but isn't known to affect any games. This turned out to be fairly convenient to implement; ORing with the PPC default NaN will quieten SNaNs and do nothing to QNaNs. --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 17c52c42cf..a91c3ec47e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -116,13 +116,17 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) if (std::find(inputs.begin(), inputs.end(), i) == inputs.end()) inputs.push_back(i); } + if (inst.OPCD != 4) { // not paired-single + UCOMISD(xmm, R(xmm)); FixupBranch handle_nan = J_CC(CC_P, true); SwitchToFarCode(); SetJumpTarget(handle_nan); + + // If any inputs are NaNs, pick the first NaN of them std::vector fixups; for (u32 x : inputs) { @@ -132,9 +136,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) UCOMISD(xmm, R(xmm)); fixups.push_back(J_CC(CC_P)); } - MOVDDUP(xmm, MConst(psGeneratedQNaN)); + + // Otherwise, pick the PPC default NaN (will be finished below) + XORPD(xmm, R(xmm)); + + // Turn SNaNs into QNaNs (or finish writing the PPC default NaN) for (FixupBranch fixup : fixups) SetJumpTarget(fixup); + ORPD(xmm, MConst(psGeneratedQNaN)); + FixupBranch done = J(true); SwitchToNearCode(); SetJumpTarget(done); @@ -142,7 +152,9 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) else { // paired-single + std::reverse(inputs.begin(), inputs.end()); + if (cpu_info.bSSE4_1) { avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD); @@ -150,8 +162,12 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) FixupBranch handle_nan = J_CC(CC_NZ, true); SwitchToFarCode(); SetJumpTarget(handle_nan); + + // Replace NaNs with PPC default NaN ASSERT_MSG(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0"); BLENDVPD(xmm, MConst(psGeneratedQNaN)); + + // If any inputs are NaNs, use those instead for (u32 x : inputs) { RCOpArg Rx = fpr.Use(x, RCMode::Read); @@ -159,13 +175,11 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, Rx, Rx, CMP_UNORD); BLENDVPD(xmm, Rx); } - FixupBranch done = J(true); - SwitchToNearCode(); - SetJumpTarget(done); } else { // SSE2 fallback + RCX64Reg tmp = fpr.Scratch(); RegCache::Realize(tmp); MOVAPD(clobber, R(xmm)); @@ -175,11 +189,15 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) FixupBranch handle_nan = J_CC(CC_NZ, true); SwitchToFarCode(); SetJumpTarget(handle_nan); + + // Replace NaNs with PPC default NaN MOVAPD(tmp, R(clobber)); ANDNPD(clobber, R(xmm)); ANDPD(tmp, MConst(psGeneratedQNaN)); ORPD(tmp, R(clobber)); MOVAPD(xmm, tmp); + + // If any inputs are NaNs, use those instead for (u32 x : inputs) { RCOpArg Rx = fpr.Use(x, RCMode::Read); @@ -191,10 +209,16 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber) ANDPD(xmm, tmp); ORPD(xmm, R(clobber)); } - FixupBranch done = J(true); - SwitchToNearCode(); - SetJumpTarget(done); } + + // Turn SNaNs into QNaNs + avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD); + ANDPD(clobber, MConst(psGeneratedQNaN)); + ORPD(xmm, R(clobber)); + + FixupBranch done = J(true); + SwitchToNearCode(); + SetJumpTarget(done); } }