From bb3306701b0fb93d969ba29532039d3cdc30c919 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 8 Oct 2022 22:42:07 +0200 Subject: [PATCH 1/4] Jit64: Flatten avx_op Reduces indentation and places the "ugly" case last. No behavior change. --- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 9ea80d4d06..bfe20ab458 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -752,32 +752,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2); } - else if (arg2.IsSimpleReg(regOp)) - { - if (reversible) - { - (this->*sseOp)(regOp, arg1); - } - else - { - // The ugly case: regOp == arg2 without AVX, or with arg1 == memory - if (!arg1.IsSimpleReg(XMM0)) - MOVAPD(XMM0, arg1); - if (cpu_info.bAVX) - { - (this->*avxOp)(regOp, XMM0, arg2); - } - else - { - (this->*sseOp)(XMM0, arg2); - if (packed) - MOVAPD(regOp, R(XMM0)); - else - MOVSD(regOp, R(XMM0)); - } - } - } - else + else if (!arg2.IsSimpleReg(regOp)) { if (packed) MOVAPD(regOp, arg1); @@ -785,6 +760,28 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), MOVSD(regOp, arg1); (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2); } + else if (reversible) + { + (this->*sseOp)(regOp, arg1); + } + else + { + // The ugly case: regOp == arg2 without AVX, or with arg1 == memory + if (!arg1.IsSimpleReg(XMM0)) + MOVAPD(XMM0, arg1); + if (cpu_info.bAVX) + { + (this->*avxOp)(regOp, XMM0, arg2); + } + else + { + (this->*sseOp)(XMM0, arg2); + if (packed) + MOVAPD(regOp, R(XMM0)); + else + MOVSD(regOp, R(XMM0)); + } + } } // Abstract between AVX and SSE: automatically handle 3-operand instructions @@ -800,7 +797,12 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm); } - else if (arg2.IsSimpleReg(regOp)) + else if (!arg2.IsSimpleReg(regOp)) + { + MOVAPD(regOp, arg1); + (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2, imm); + } + else { // The ugly case: regOp == arg2 without AVX, or with arg1 == memory if (!arg1.IsSimpleReg(XMM0)) @@ -816,11 +818,6 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, MOVAPD(regOp, R(XMM0)); } } - else - { - MOVAPD(regOp, arg1); - (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2, imm); - } } alignas(16) static const u64 psMantissaTruncate[2] = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL}; From 2c2e06bf39318bfafd76201407d0e346f8a045fb Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 4 Dec 2022 11:37:49 +0100 Subject: [PATCH 2/4] Jit64: Add extra cases for reversible avx_op Optimization. --- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index bfe20ab458..02c8ba1893 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -748,10 +748,18 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), { (this->*sseOp)(regOp, arg2); } - else if (arg1.IsSimpleReg() && cpu_info.bAVX) + else if (reversible && arg2.IsSimpleReg(regOp)) + { + (this->*sseOp)(regOp, arg1); + } + else if (cpu_info.bAVX && arg1.IsSimpleReg()) { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2); } + else if (cpu_info.bAVX && reversible && arg2.IsSimpleReg()) + { + (this->*avxOp)(regOp, arg2.GetSimpleReg(), arg1); + } else if (!arg2.IsSimpleReg(regOp)) { if (packed) @@ -760,13 +768,17 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), MOVSD(regOp, arg1); (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2); } - else if (reversible) + else if (reversible && !arg1.IsSimpleReg(regOp)) { - (this->*sseOp)(regOp, arg1); + if (packed) + MOVAPD(regOp, arg2); + else + MOVSD(regOp, arg2); + (this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg1); } else { - // The ugly case: regOp == arg2 without AVX, or with arg1 == memory + // The ugly case: Not reversible, and we have regOp == arg2 without AVX or with arg1 == memory if (!arg1.IsSimpleReg(XMM0)) MOVAPD(XMM0, arg1); if (cpu_info.bAVX) @@ -793,7 +805,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, { (this->*sseOp)(regOp, arg2, imm); } - else if (arg1.IsSimpleReg() && cpu_info.bAVX) + else if (cpu_info.bAVX && arg1.IsSimpleReg()) { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm); } From 2bb59ff0dca59cbd21f1a78bc4dc8d315264d7da Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 8 Oct 2022 22:08:50 +0200 Subject: [PATCH 3/4] Jit64: Inline avx_op into fp_arith This will let us manage registers better in the next commit. --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 9d99dd0a75..8deabd3b00 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -298,7 +298,56 @@ void Jit64::fp_arith(UGeckoInstruction inst) } else { - avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible); + if (Ra.IsSimpleReg(dest)) + { + (this->*sseOp)(dest, Rarg2); + } + else if (reversible && Rarg2.IsSimpleReg(dest)) + { + (this->*sseOp)(dest, Ra); + } + else if (cpu_info.bAVX && Ra.IsSimpleReg()) + { + (this->*avxOp)(dest, Ra.GetSimpleReg(), Rarg2); + } + else if (cpu_info.bAVX && reversible && Rarg2.IsSimpleReg()) + { + (this->*avxOp)(dest, Rarg2.GetSimpleReg(), Ra); + } + else if (!Rarg2.IsSimpleReg(dest)) + { + if (packed) + MOVAPD(dest, Ra); + else + MOVSD(dest, Ra); + (this->*sseOp)(dest, OpArg(Ra) == OpArg(Rarg2) ? R(dest) : Rarg2); + } + else if (reversible && !Ra.IsSimpleReg(dest)) + { + if (packed) + MOVAPD(dest, Rarg2); + else + MOVSD(dest, Rarg2); + (this->*sseOp)(dest, OpArg(Ra) == OpArg(Rarg2) ? R(dest) : Ra); + } + else + { + // The ugly case: Not reversible, and we have dest == Rarg2 without AVX or with Ra == memory + if (!Ra.IsSimpleReg(XMM0)) + MOVAPD(XMM0, Ra); + if (cpu_info.bAVX) + { + (this->*avxOp)(dest, XMM0, Rarg2); + } + else + { + (this->*sseOp)(XMM0, Rarg2); + if (packed) + MOVAPD(dest, R(XMM0)); + else + MOVSD(dest, R(XMM0)); + } + } } switch (inst.SUBOP5) From 13e9d5c889cea23228c57676bd964e788824c547 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 8 Oct 2022 22:36:14 +0200 Subject: [PATCH 4/4] Jit64: Improve register handling in fp_arith This lets us avoid the "ugly" case. --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 37 ++++--------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 8deabd3b00..4723c1c2cf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -282,7 +282,9 @@ void Jit64::fp_arith(UGeckoInstruction inst) RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read); RegCache::Realize(Rd, Ra, Rarg2); - X64Reg dest = preserve_inputs ? XMM1 : static_cast(Rd); + X64Reg dest = X64Reg(Rd); + if (preserve_inputs && (a == d || arg2 == d)) + dest = XMM1; if (round_rhs) { if (a == d && !preserve_inputs) @@ -314,39 +316,16 @@ void Jit64::fp_arith(UGeckoInstruction inst) { (this->*avxOp)(dest, Rarg2.GetSimpleReg(), Ra); } - else if (!Rarg2.IsSimpleReg(dest)) + else { + if (Rarg2.IsSimpleReg(dest)) + dest = XMM1; + if (packed) MOVAPD(dest, Ra); else MOVSD(dest, Ra); - (this->*sseOp)(dest, OpArg(Ra) == OpArg(Rarg2) ? R(dest) : Rarg2); - } - else if (reversible && !Ra.IsSimpleReg(dest)) - { - if (packed) - MOVAPD(dest, Rarg2); - else - MOVSD(dest, Rarg2); - (this->*sseOp)(dest, OpArg(Ra) == OpArg(Rarg2) ? R(dest) : Ra); - } - else - { - // The ugly case: Not reversible, and we have dest == Rarg2 without AVX or with Ra == memory - if (!Ra.IsSimpleReg(XMM0)) - MOVAPD(XMM0, Ra); - if (cpu_info.bAVX) - { - (this->*avxOp)(dest, XMM0, Rarg2); - } - else - { - (this->*sseOp)(XMM0, Rarg2); - if (packed) - MOVAPD(dest, R(XMM0)); - else - MOVSD(dest, R(XMM0)); - } + (this->*sseOp)(dest, a == arg2 ? R(dest) : Rarg2); } }