Jit64: merge tri_op into fp_tri_op

This commit is contained in:
Tillmann Karras 2015-05-21 12:33:37 +02:00
parent dc220fa13d
commit 6d23b511a6
3 changed files with 10 additions and 40 deletions

View File

@ -137,13 +137,11 @@ public:
void MultiplyImmediate(u32 imm, int a, int d, bool overflow); void MultiplyImmediate(u32 imm, int a, int d, bool overflow);
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg),
void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
typedef u32 (*Operation)(u32 a, u32 b); typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
bool Rc = false, bool carry = false); bool Rc = false, bool carry = false);
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg), void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg),
void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool packed = false, bool roundRHS = false); void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg), bool packed = false, bool roundRHS = false);
void FloatCompare(UGeckoInstruction inst, bool upper = false); void FloatCompare(UGeckoInstruction inst, bool upper = false);
// OPCODES // OPCODES

View File

@ -16,7 +16,7 @@ static const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFF
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000}; static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*avxOp)(X64Reg, X64Reg, OpArg), void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*avxOp)(X64Reg, X64Reg, OpArg),
void (XEmitter::*sseOp)(X64Reg, OpArg), UGeckoInstruction inst, bool packed, bool roundRHS) void (XEmitter::*sseOp)(X64Reg, OpArg), bool packed, bool roundRHS)
{ {
fpr.Lock(d, a, b); fpr.Lock(d, a, b);
fpr.BindToRegister(d, d == a || d == b || !single); fpr.BindToRegister(d, d == a || d == b || !single);
@ -80,13 +80,13 @@ void Jit64::fp_arith(UGeckoInstruction inst)
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 18: fp_tri_op(d, a, b, false, single, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD, case 18: fp_tri_op(d, a, b, false, single, packed ? &XEmitter::VDIVPD : &XEmitter::VDIVSD,
packed ? &XEmitter::DIVPD : &XEmitter::DIVSD, inst, packed); break; packed ? &XEmitter::DIVPD : &XEmitter::DIVSD, packed); break;
case 20: fp_tri_op(d, a, b, false, single, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD, case 20: fp_tri_op(d, a, b, false, single, packed ? &XEmitter::VSUBPD : &XEmitter::VSUBSD,
packed ? &XEmitter::SUBPD : &XEmitter::SUBSD, inst, packed); break; packed ? &XEmitter::SUBPD : &XEmitter::SUBSD, packed); break;
case 21: fp_tri_op(d, a, b, true, single, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD, case 21: fp_tri_op(d, a, b, true, single, packed ? &XEmitter::VADDPD : &XEmitter::VADDSD,
packed ? &XEmitter::ADDPD : &XEmitter::ADDSD, inst, packed); break; packed ? &XEmitter::ADDPD : &XEmitter::ADDSD, packed); break;
case 25: fp_tri_op(d, a, c, true, single, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD, case 25: fp_tri_op(d, a, c, true, single, packed ? &XEmitter::VMULPD : &XEmitter::VMULSD,
packed ? &XEmitter::MULPD : &XEmitter::MULSD, inst, packed, round_input); break; packed ? &XEmitter::MULPD : &XEmitter::MULSD, packed, round_input); break;
default: default:
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
} }

View File

@ -56,34 +56,6 @@ void Jit64::ps_sign(UGeckoInstruction inst)
fpr.UnlockAll(); fpr.UnlockAll();
} }
//There's still a little bit more optimization that can be squeezed out of this
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp)(X64Reg, X64Reg, OpArg), void (XEmitter::*sseOp)(X64Reg, OpArg), UGeckoInstruction inst, bool roundRHS)
{
fpr.Lock(d, a, b);
fpr.BindToRegister(d, d == a || d == b);
if (roundRHS)
{
if (d == a)
{
Force25BitPrecision(XMM0, fpr.R(b), XMM1);
(this->*sseOp)(fpr.RX(d), R(XMM0));
}
else
{
Force25BitPrecision(fpr.RX(d), fpr.R(b), XMM0);
(this->*sseOp)(fpr.RX(d), fpr.R(a));
}
}
else
{
avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible);
}
ForceSinglePrecision(fpr.RX(d), fpr.R(d));
SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll();
}
void Jit64::ps_arith(UGeckoInstruction inst) void Jit64::ps_arith(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -94,16 +66,16 @@ void Jit64::ps_arith(UGeckoInstruction inst)
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 18: // div case 18: // div
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::VDIVPD, &XEmitter::DIVPD, inst); fp_tri_op(inst.FD, inst.FA, inst.FB, false, true, &XEmitter::VDIVPD, &XEmitter::DIVPD, true);
break; break;
case 20: // sub case 20: // sub
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::VSUBPD, &XEmitter::SUBPD, inst); fp_tri_op(inst.FD, inst.FA, inst.FB, false, true, &XEmitter::VSUBPD, &XEmitter::SUBPD, true);
break; break;
case 21: // add case 21: // add
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::VADDPD, &XEmitter::ADDPD, inst); fp_tri_op(inst.FD, inst.FA, inst.FB, true, true, &XEmitter::VADDPD, &XEmitter::ADDPD, true);
break; break;
case 25: // mul case 25: // mul
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::VMULPD, &XEmitter::MULPD, inst, round_input); fp_tri_op(inst.FD, inst.FA, inst.FC, true, true, &XEmitter::VMULPD, &XEmitter::MULPD, true, round_input);
break; break;
default: default:
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");