mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-14 21:37:52 -07:00
Jit64: Skip HandleNaNs for operations that can't generate NaN
Operations that have two operands and can't generate a default NaN, i.e. addition and subtraction, already have the desired NaN handling on x86. We just need to make sure to not reverse the operands. This fixes ps_sum0/ps_sum1 outputting NaNs in cases where they shouldn't. (HandleNaNs assumes that a NaN in a ps0 input always results in a NaN in the ps0 output, and correspondingly for ps1.)
This commit is contained in:
parent
cbceae9176
commit
2f1a8ee1b9
@ -236,8 +236,7 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||||||
|
|
||||||
bool single = inst.OPCD == 4 || inst.OPCD == 59;
|
bool single = inst.OPCD == 4 || inst.OPCD == 59;
|
||||||
// If both the inputs are known to have identical top and bottom halves, we can skip the MOVDDUP
|
// If both the inputs are known to have identical top and bottom halves, we can skip the MOVDDUP
|
||||||
// at the end by
|
// at the end by using packed arithmetic instead.
|
||||||
// using packed arithmetic instead.
|
|
||||||
bool packed = inst.OPCD == 4 ||
|
bool packed = inst.OPCD == 4 ||
|
||||||
(inst.OPCD == 59 && js.op->fprIsDuplicated[a] && js.op->fprIsDuplicated[arg2]);
|
(inst.OPCD == 59 && js.op->fprIsDuplicated[a] && js.op->fprIsDuplicated[arg2]);
|
||||||
// Packed divides are slower than scalar divides on basically all x86, so this optimization isn't
|
// Packed divides are slower than scalar divides on basically all x86, so this optimization isn't
|
||||||
@ -249,10 +248,12 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||||||
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&) = nullptr;
|
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&) = nullptr;
|
||||||
void (XEmitter::*sseOp)(X64Reg, const OpArg&) = nullptr;
|
void (XEmitter::*sseOp)(X64Reg, const OpArg&) = nullptr;
|
||||||
bool reversible = false;
|
bool reversible = false;
|
||||||
bool roundRHS = false;
|
bool round_rhs = false;
|
||||||
|
bool preserve_inputs = false;
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 18:
|
case 18:
|
||||||
|
preserve_inputs = m_accurate_nans;
|
||||||
avxOp = packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD;
|
avxOp = packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD;
|
||||||
sseOp = packed ? &XEmitter::DIVPD : &XEmitter::DIVSD;
|
sseOp = packed ? &XEmitter::DIVPD : &XEmitter::DIVSD;
|
||||||
break;
|
break;
|
||||||
@ -261,13 +262,14 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||||||
sseOp = packed ? &XEmitter::SUBPD : &XEmitter::SUBSD;
|
sseOp = packed ? &XEmitter::SUBPD : &XEmitter::SUBSD;
|
||||||
break;
|
break;
|
||||||
case 21:
|
case 21:
|
||||||
reversible = true;
|
reversible = !m_accurate_nans;
|
||||||
avxOp = packed ? &XEmitter::VADDPD : &XEmitter::VADDSD;
|
avxOp = packed ? &XEmitter::VADDPD : &XEmitter::VADDSD;
|
||||||
sseOp = packed ? &XEmitter::ADDPD : &XEmitter::ADDSD;
|
sseOp = packed ? &XEmitter::ADDPD : &XEmitter::ADDSD;
|
||||||
break;
|
break;
|
||||||
case 25:
|
case 25:
|
||||||
reversible = true;
|
reversible = true;
|
||||||
roundRHS = single && !js.op->fprIsSingle[c];
|
round_rhs = single && !js.op->fprIsSingle[c];
|
||||||
|
preserve_inputs = m_accurate_nans;
|
||||||
avxOp = packed ? &XEmitter::VMULPD : &XEmitter::VMULSD;
|
avxOp = packed ? &XEmitter::VMULPD : &XEmitter::VMULSD;
|
||||||
sseOp = packed ? &XEmitter::MULPD : &XEmitter::MULSD;
|
sseOp = packed ? &XEmitter::MULPD : &XEmitter::MULSD;
|
||||||
break;
|
break;
|
||||||
@ -280,9 +282,8 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||||||
RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read);
|
RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read);
|
||||||
RegCache::Realize(Rd, Ra, Rarg2);
|
RegCache::Realize(Rd, Ra, Rarg2);
|
||||||
|
|
||||||
bool preserve_inputs = m_accurate_nans;
|
|
||||||
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
|
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
|
||||||
if (roundRHS)
|
if (round_rhs)
|
||||||
{
|
{
|
||||||
if (a == d && !preserve_inputs)
|
if (a == d && !preserve_inputs)
|
||||||
{
|
{
|
||||||
@ -300,10 +301,15 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||||||
avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible);
|
avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst.SUBOP5 != 25)
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 18:
|
||||||
HandleNaNs(inst, dest, XMM0, Ra, Rarg2, std::nullopt);
|
HandleNaNs(inst, dest, XMM0, Ra, Rarg2, std::nullopt);
|
||||||
else
|
break;
|
||||||
|
case 25:
|
||||||
HandleNaNs(inst, dest, XMM0, Ra, std::nullopt, Rarg2);
|
HandleNaNs(inst, dest, XMM0, Ra, std::nullopt, Rarg2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (single)
|
if (single)
|
||||||
FinalizeSingleResult(Rd, R(dest), packed, true);
|
FinalizeSingleResult(Rd, R(dest), packed, true);
|
||||||
|
@ -79,7 +79,8 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
|||||||
default:
|
default:
|
||||||
PanicAlertFmt("ps_sum WTF!!!");
|
PanicAlertFmt("ps_sum WTF!!!");
|
||||||
}
|
}
|
||||||
HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1, Ra, Rb, Rc);
|
// We're intentionally not calling HandleNaNs here.
|
||||||
|
// For addition and subtraction specifically, x86's NaN behavior matches PPC's.
|
||||||
FinalizeSingleResult(Rd, R(tmp));
|
FinalizeSingleResult(Rd, R(tmp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user