diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 9d99dd0a75..4723c1c2cf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -282,7 +282,9 @@ void Jit64::fp_arith(UGeckoInstruction inst) RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read); RegCache::Realize(Rd, Ra, Rarg2); - X64Reg dest = preserve_inputs ? XMM1 : static_cast(Rd); + X64Reg dest = X64Reg(Rd); + if (preserve_inputs && (a == d || arg2 == d)) + dest = XMM1; if (round_rhs) { if (a == d && !preserve_inputs) @@ -298,7 +300,33 @@ void Jit64::fp_arith(UGeckoInstruction inst) } else { - avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible); + if (Ra.IsSimpleReg(dest)) + { + (this->*sseOp)(dest, Rarg2); + } + else if (reversible && Rarg2.IsSimpleReg(dest)) + { + (this->*sseOp)(dest, Ra); + } + else if (cpu_info.bAVX && Ra.IsSimpleReg()) + { + (this->*avxOp)(dest, Ra.GetSimpleReg(), Rarg2); + } + else if (cpu_info.bAVX && reversible && Rarg2.IsSimpleReg()) + { + (this->*avxOp)(dest, Rarg2.GetSimpleReg(), Ra); + } + else + { + if (Rarg2.IsSimpleReg(dest)) + dest = XMM1; + + if (packed) + MOVAPD(dest, Ra); + else + MOVSD(dest, Ra); + (this->*sseOp)(dest, a == arg2 ? R(dest) : Rarg2); + } } switch (inst.SUBOP5) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 9ea80d4d06..02c8ba1893 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -748,36 +748,19 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), { (this->*sseOp)(regOp, arg2); } - else if (arg1.IsSimpleReg() && cpu_info.bAVX) + else if (reversible && arg2.IsSimpleReg(regOp)) + { + (this->*sseOp)(regOp, arg1); + } + else if (cpu_info.bAVX && arg1.IsSimpleReg()) { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2); } - else if (arg2.IsSimpleReg(regOp)) + else if (cpu_info.bAVX && reversible && arg2.IsSimpleReg()) { - if (reversible) - { - (this->*sseOp)(regOp, arg1); - } - else - { - // The ugly case: regOp == arg2 without AVX, or with arg1 == memory - if (!arg1.IsSimpleReg(XMM0)) - MOVAPD(XMM0, arg1); - if (cpu_info.bAVX) - { - (this->*avxOp)(regOp, XMM0, arg2); - } - else - { - (this->*sseOp)(XMM0, arg2); - if (packed) - MOVAPD(regOp, R(XMM0)); - else - MOVSD(regOp, R(XMM0)); - } - } + (this->*avxOp)(regOp, arg2.GetSimpleReg(), arg1); } - else + else if (!arg2.IsSimpleReg(regOp)) { if (packed) MOVAPD(regOp, arg1); @@ -785,6 +768,32 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), MOVSD(regOp, arg1); (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2); } + else if (reversible && !arg1.IsSimpleReg(regOp)) + { + if (packed) + MOVAPD(regOp, arg2); + else + MOVSD(regOp, arg2); + (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg1); + } + else + { + // The ugly case: Not reversible, and we have regOp == arg2 without AVX or with arg1 == memory + if (!arg1.IsSimpleReg(XMM0)) + MOVAPD(XMM0, arg1); + if (cpu_info.bAVX) + { + (this->*avxOp)(regOp, XMM0, arg2); + } + else + { + (this->*sseOp)(XMM0, arg2); + if (packed) + MOVAPD(regOp, R(XMM0)); + else + MOVSD(regOp, R(XMM0)); + } + } } // Abstract between AVX and SSE: automatically handle 3-operand instructions @@ -796,11 +805,16 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, { (this->*sseOp)(regOp, arg2, imm); } - else if (arg1.IsSimpleReg() && cpu_info.bAVX) + else if (cpu_info.bAVX && arg1.IsSimpleReg()) { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm); } - else if (arg2.IsSimpleReg(regOp)) + else if (!arg2.IsSimpleReg(regOp)) + { + MOVAPD(regOp, arg1); + (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2, imm); + } + else { // The ugly case: regOp == arg2 without AVX, or with arg1 == memory if (!arg1.IsSimpleReg(XMM0)) @@ -816,11 +830,6 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, MOVAPD(regOp, R(XMM0)); } } - else - { - MOVAPD(regOp, arg1); - (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2, imm); - } } alignas(16) static const u64 psMantissaTruncate[2] = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL};