Jit_FloatingPoint: fp_arith

This commit is contained in:
MerryMage
2018-10-15 21:01:54 +01:00
parent 2337e089bf
commit a26c9c4b74

View File

@ -172,53 +172,55 @@ void Jit64::fp_arith(UGeckoInstruction inst)
bool round_input = single && !js.op->fprIsSingle[inst.FC]; bool round_input = single && !js.op->fprIsSingle[inst.FC];
bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs; bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs;
const auto fp_tri_op = [&](int d, int a, int b, bool reversible, const auto fp_tri_op = [&](int op1, int op2, bool reversible,
void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool roundRHS = false) { void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool roundRHS = false) {
fpr.Lock(d, a, b); RCX64Reg Rd = fpr.Bind(d, !single ? RCMode::ReadWrite : RCMode::Write);
fpr.BindToRegister(d, d == a || d == b || !single); RCOpArg Rop1 = fpr.Use(op1, RCMode::Read);
X64Reg dest = preserve_inputs ? XMM1 : fpr.RX(d); RCOpArg Rop2 = fpr.Use(op2, RCMode::Read);
RegCache::Realize(Rd, Rop1, Rop2);
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
if (roundRHS) if (roundRHS)
{ {
if (d == a && !preserve_inputs) if (d == op1 && !preserve_inputs)
{ {
Force25BitPrecision(XMM0, fpr.R(b), XMM1); Force25BitPrecision(XMM0, Rop2, XMM1);
(this->*sseOp)(fpr.RX(d), R(XMM0)); (this->*sseOp)(Rd, R(XMM0));
} }
else else
{ {
Force25BitPrecision(dest, fpr.R(b), XMM0); Force25BitPrecision(dest, Rop2, XMM0);
(this->*sseOp)(dest, fpr.R(a)); (this->*sseOp)(dest, Rop1);
} }
} }
else else
{ {
avx_op(avxOp, sseOp, dest, fpr.R(a), fpr.R(b), packed, reversible); avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible);
} }
HandleNaNs(inst, fpr.RX(d), dest); HandleNaNs(inst, Rd, dest);
if (single) if (single)
ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true); ForceSinglePrecision(Rd, Rd, packed, true);
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(Rd);
fpr.UnlockAll();
}; };
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 18: case 18:
fp_tri_op(d, a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD, fp_tri_op(a, b, false, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
packed ? &XEmitter::DIVPD : &XEmitter::DIVSD); packed ? &XEmitter::DIVPD : &XEmitter::DIVSD);
break; break;
case 20: case 20:
fp_tri_op(d, a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD, fp_tri_op(a, b, false, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
packed ? &XEmitter::SUBPD : &XEmitter::SUBSD); packed ? &XEmitter::SUBPD : &XEmitter::SUBSD);
break; break;
case 21: case 21:
fp_tri_op(d, a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD, fp_tri_op(a, b, true, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
packed ? &XEmitter::ADDPD : &XEmitter::ADDSD); packed ? &XEmitter::ADDPD : &XEmitter::ADDSD);
break; break;
case 25: case 25:
fp_tri_op(d, a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD, fp_tri_op(a, c, true, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
packed ? &XEmitter::MULPD : &XEmitter::MULSD, round_input); packed ? &XEmitter::MULPD : &XEmitter::MULSD, round_input);
break; break;
default: default: