Jit64: Skip HandleNaNs for operations that can't generate NaN

Operations that have two operands and can't generate a default NaN,
i.e. addition and subtraction, already have the desired NaN handling
on x86. We just need to make sure to not reverse the operands.

This fixes ps_sum0/ps_sum1 outputting NaNs in cases where they shouldn't.
(HandleNaNs assumes that a NaN in a ps0 input always results in a NaN in
the ps0 output, and correspondingly for ps1.)
This commit is contained in:
JosJuice 2022-11-26 14:32:42 +01:00
parent cbceae9176
commit 2f1a8ee1b9
2 changed files with 17 additions and 10 deletions

View File

@ -236,8 +236,7 @@ void Jit64::fp_arith(UGeckoInstruction inst)
bool single = inst.OPCD == 4 || inst.OPCD == 59; bool single = inst.OPCD == 4 || inst.OPCD == 59;
// If both the inputs are known to have identical top and bottom halves, we can skip the MOVDDUP // If both the inputs are known to have identical top and bottom halves, we can skip the MOVDDUP
// at the end by // at the end by using packed arithmetic instead.
// using packed arithmetic instead.
bool packed = inst.OPCD == 4 || bool packed = inst.OPCD == 4 ||
(inst.OPCD == 59 && js.op->fprIsDuplicated[a] && js.op->fprIsDuplicated[arg2]); (inst.OPCD == 59 && js.op->fprIsDuplicated[a] && js.op->fprIsDuplicated[arg2]);
// Packed divides are slower than scalar divides on basically all x86, so this optimization isn't // Packed divides are slower than scalar divides on basically all x86, so this optimization isn't
@ -249,10 +248,12 @@ void Jit64::fp_arith(UGeckoInstruction inst)
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&) = nullptr; void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&) = nullptr;
void (XEmitter::*sseOp)(X64Reg, const OpArg&) = nullptr; void (XEmitter::*sseOp)(X64Reg, const OpArg&) = nullptr;
bool reversible = false; bool reversible = false;
bool roundRHS = false; bool round_rhs = false;
bool preserve_inputs = false;
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 18: case 18:
preserve_inputs = m_accurate_nans;
avxOp = packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD; avxOp = packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD;
sseOp = packed ? &XEmitter::DIVPD : &XEmitter::DIVSD; sseOp = packed ? &XEmitter::DIVPD : &XEmitter::DIVSD;
break; break;
@ -261,13 +262,14 @@ void Jit64::fp_arith(UGeckoInstruction inst)
sseOp = packed ? &XEmitter::SUBPD : &XEmitter::SUBSD; sseOp = packed ? &XEmitter::SUBPD : &XEmitter::SUBSD;
break; break;
case 21: case 21:
reversible = true; reversible = !m_accurate_nans;
avxOp = packed ? &XEmitter::VADDPD : &XEmitter::VADDSD; avxOp = packed ? &XEmitter::VADDPD : &XEmitter::VADDSD;
sseOp = packed ? &XEmitter::ADDPD : &XEmitter::ADDSD; sseOp = packed ? &XEmitter::ADDPD : &XEmitter::ADDSD;
break; break;
case 25: case 25:
reversible = true; reversible = true;
roundRHS = single && !js.op->fprIsSingle[c]; round_rhs = single && !js.op->fprIsSingle[c];
preserve_inputs = m_accurate_nans;
avxOp = packed ? &XEmitter::VMULPD : &XEmitter::VMULSD; avxOp = packed ? &XEmitter::VMULPD : &XEmitter::VMULSD;
sseOp = packed ? &XEmitter::MULPD : &XEmitter::MULSD; sseOp = packed ? &XEmitter::MULPD : &XEmitter::MULSD;
break; break;
@ -280,9 +282,8 @@ void Jit64::fp_arith(UGeckoInstruction inst)
RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read); RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read);
RegCache::Realize(Rd, Ra, Rarg2); RegCache::Realize(Rd, Ra, Rarg2);
bool preserve_inputs = m_accurate_nans;
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd); X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
if (roundRHS) if (round_rhs)
{ {
if (a == d && !preserve_inputs) if (a == d && !preserve_inputs)
{ {
@ -300,10 +301,15 @@ void Jit64::fp_arith(UGeckoInstruction inst)
avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible); avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible);
} }
if (inst.SUBOP5 != 25) switch (inst.SUBOP5)
{
case 18:
HandleNaNs(inst, dest, XMM0, Ra, Rarg2, std::nullopt); HandleNaNs(inst, dest, XMM0, Ra, Rarg2, std::nullopt);
else break;
case 25:
HandleNaNs(inst, dest, XMM0, Ra, std::nullopt, Rarg2); HandleNaNs(inst, dest, XMM0, Ra, std::nullopt, Rarg2);
break;
}
if (single) if (single)
FinalizeSingleResult(Rd, R(dest), packed, true); FinalizeSingleResult(Rd, R(dest), packed, true);

View File

@ -79,7 +79,8 @@ void Jit64::ps_sum(UGeckoInstruction inst)
default: default:
PanicAlertFmt("ps_sum WTF!!!"); PanicAlertFmt("ps_sum WTF!!!");
} }
HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1, Ra, Rb, Rc); // We're intentionally not calling HandleNaNs here.
// For addition and subtraction specifically, x86's NaN behavior matches PPC's.
FinalizeSingleResult(Rd, R(tmp)); FinalizeSingleResult(Rd, R(tmp));
} }