From 19713f7c14694d3714b89ab42d3b104b2eafad75 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 10 Feb 2016 15:41:14 +0100 Subject: [PATCH] JitArm64: Merge scalar 4-operant instructions. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 8 - .../JitArm64/JitArm64_FloatingPoint.cpp | 201 +++--------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 16 +- 3 files changed, 37 insertions(+), 188 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index cd070d1feb..3c70a716ae 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -139,17 +139,9 @@ public: // Floating point void fp_arith(UGeckoInstruction inst); void fabsx(UGeckoInstruction inst); - void fmaddsx(UGeckoInstruction inst); - void fmaddx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst); - void fmsubsx(UGeckoInstruction inst); - void fmsubx(UGeckoInstruction inst); void fnabsx(UGeckoInstruction inst); void fnegx(UGeckoInstruction inst); - void fnmaddsx(UGeckoInstruction inst); - void fnmaddx(UGeckoInstruction inst); - void fnmsubsx(UGeckoInstruction inst); - void fnmsubx(UGeckoInstruction inst); void fselx(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 1062e302c2..52af9af6a6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -37,40 +37,55 @@ void JitArm64::fp_arith(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - u32 a = inst.FA, d = inst.FD; - u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB; + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + u32 op5 = inst.SUBOP5; bool single = inst.OPCD == 59; bool packed = inst.OPCD == 4; + bool use_c = op5 >= 25; // fmul and all kind of fmaddXX + bool use_b = op5 != 25; // fmul uses no B + + ARM64Reg VA, VB, VC, VD; + if (packed) { - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + VA = fpr.R(a, REG_REG); + if (use_b) + VB = fpr.R(b, REG_REG); + if (use_c) + VC = fpr.R(c, REG_REG); + VD = fpr.RW(d, REG_REG); - switch (inst.SUBOP5) + switch (op5) { case 18: m_float_emit.FDIV(64, VD, VA, VB); break; case 20: m_float_emit.FSUB(64, VD, VA, VB); break; case 21: m_float_emit.FADD(64, VD, VA, VB); break; - case 25: m_float_emit.FMUL(64, VD, VA, VB); break; - default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + case 25: m_float_emit.FMUL(64, VD, VA, VC); break; + default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break; } } else { - ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); - ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); - ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); + VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); + if (use_b) + VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); + if (use_c) + VC = EncodeRegToDouble(fpr.R(c, REG_IS_LOADED)); + VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); - switch (inst.SUBOP5) + switch (op5) { case 18: m_float_emit.FDIV(VD, VA, VB); break; case 20: m_float_emit.FSUB(VD, VA, VB); break; case 21: m_float_emit.FADD(VD, VA, VB); break; - case 25: m_float_emit.FMUL(VD, VA, VB); break; - default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + case 25: m_float_emit.FMUL(VD, VA, VC); break; + case 28: m_float_emit.FNMSUB(VD, VA, VC, VB); break; // fmsub: "D = A*C - B" vs "Vd = (-Va) + Vn*Vm" + case 29: m_float_emit.FMADD(VD, VA, VC, VB); break; // fmadd: "D = A*C + B" vs "Vd = Va + Vn*Vm" + case 30: m_float_emit.FMSUB(VD, VA, VC, VB); break; // fnmsub: "D = -(A*C - B)" vs "Vd = Va + (-Vn)*Vm" + case 31: m_float_emit.FNMADD(VD, VA, VC, VB); break; // fnmadd: "D = -(A*C + B)" vs "Vd = (-Va) + (-Vn)*Vm" + default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break; } } @@ -78,45 +93,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst) fpr.FixSinglePrecision(d); } -void JitArm64::fmaddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fmaddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - void JitArm64::fmrx(UGeckoInstruction inst) { INSTRUCTION_START @@ -131,45 +107,6 @@ void JitArm64::fmrx(UGeckoInstruction inst) m_float_emit.INS(64, VD, 0, VB, 0); } -void JitArm64::fmsubsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fmsubx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FNMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - void JitArm64::fnabsx(UGeckoInstruction inst) { INSTRUCTION_START @@ -199,86 +136,6 @@ void JitArm64::fnegx(UGeckoInstruction inst) m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); } -void JitArm64::fnmaddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fnmaddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FNMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - -void JitArm64::fnmsubsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fnmsubx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - void JitArm64::fselx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index bb7cc8fe6c..22752577e0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -318,10 +318,10 @@ static GekkoOPTemplate table59[] = {21, &JitArm64::fp_arith}, // faddsx {24, &JitArm64::FallBackToInterpreter}, // fresx {25, &JitArm64::fp_arith}, // fmulsx - {28, &JitArm64::fmsubsx}, // fmsubsx - {29, &JitArm64::fmaddsx}, // fmaddsx - {30, &JitArm64::fnmsubsx}, // fnmsubsx - {31, &JitArm64::fnmaddsx}, // fnmaddsx + {28, &JitArm64::fp_arith}, // fmsubsx + {29, &JitArm64::fp_arith}, // fmaddsx + {30, &JitArm64::fp_arith}, // fnmsubsx + {31, &JitArm64::fp_arith}, // fnmaddsx }; static GekkoOPTemplate table63[] = @@ -352,10 +352,10 @@ static GekkoOPTemplate table63_2[] = {23, &JitArm64::fselx}, // fselx {25, &JitArm64::fp_arith}, // fmulx {26, &JitArm64::FallBackToInterpreter}, // frsqrtex - {28, &JitArm64::fmsubx}, // fmsubx - {29, &JitArm64::fmaddx}, // fmaddx - {30, &JitArm64::fnmsubx}, // fnmsubx - {31, &JitArm64::fnmaddx}, // fnmaddx + {28, &JitArm64::fp_arith}, // fmsubx + {29, &JitArm64::fp_arith}, // fmaddx + {30, &JitArm64::fp_arith}, // fnmsubx + {31, &JitArm64::fp_arith}, // fnmaddx };