From 83eb1d8c31714e7b517ebd02ce92323864e01600 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 10 Feb 2016 14:49:06 +0100 Subject: [PATCH 01/10] JitArm64: Merge 3 way FP instructions. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 9 +- .../JitArm64/JitArm64_FloatingPoint.cpp | 146 ++++-------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 16 +- 3 files changed, 35 insertions(+), 136 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 7ad9464e05..1016ddec9f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -137,16 +137,13 @@ public: void stfXX(UGeckoInstruction inst); // Floating point + void fp_arith(UGeckoInstruction inst); void fabsx(UGeckoInstruction inst); - void faddsx(UGeckoInstruction inst); - void faddx(UGeckoInstruction inst); void fmaddsx(UGeckoInstruction inst); void fmaddx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst); void fmsubsx(UGeckoInstruction inst); void fmsubx(UGeckoInstruction inst); - void fmulsx(UGeckoInstruction inst); - void fmulx(UGeckoInstruction inst); void fnabsx(UGeckoInstruction inst); void fnegx(UGeckoInstruction inst); void fnmaddsx(UGeckoInstruction inst); @@ -154,13 +151,9 @@ public: void fnmsubsx(UGeckoInstruction inst); void fnmsubx(UGeckoInstruction inst); void fselx(UGeckoInstruction inst); - void fsubsx(UGeckoInstruction inst); - void fsubx(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); void fctiwzx(UGeckoInstruction inst); - void fdivx(UGeckoInstruction inst); - void fdivsx(UGeckoInstruction inst); // Paired void ps_abs(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 5ad2bf8321..e30a3ca3aa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -30,37 +30,42 @@ void JitArm64::fabsx(UGeckoInstruction inst) m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); } -void JitArm64::faddsx(UGeckoInstruction inst) +void JitArm64::fp_arith(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - u32 a = inst.FA, b = inst.FB, d = inst.FD; + u32 a = inst.FA, d = inst.FD; + u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB; - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); + bool single = inst.OPCD == 4 || inst.OPCD == 59; - m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); -} + ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); + ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); + ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); -void JitArm64::faddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); + switch (inst.SUBOP5) + { + case 18: + m_float_emit.FDIV(VD, VA, VB); + break; + case 20: + m_float_emit.FSUB(VD, VA, VB); + break; + case 21: + m_float_emit.FADD(VD, VA, VB); + break; + case 25: + m_float_emit.FMUL(VD, VA, VB); + break; + default: + _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + } - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + if (single) + fpr.FixSinglePrecision(d); } void JitArm64::fmaddsx(UGeckoInstruction inst) @@ -155,39 +160,6 @@ void JitArm64::fmsubx(UGeckoInstruction inst) m_float_emit.FNMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); } -void JitArm64::fmulsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - - m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - fpr.FixSinglePrecision(d); -} - -void JitArm64::fmulx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); -} - void JitArm64::fnabsx(UGeckoInstruction inst) { INSTRUCTION_START @@ -314,39 +286,6 @@ void JitArm64::fselx(UGeckoInstruction inst) m_float_emit.FCSEL(EncodeRegToDouble(VD), EncodeRegToDouble(VC), EncodeRegToDouble(VB), CC_GE); } -void JitArm64::fsubsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - - m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); -} - -void JitArm64::fsubx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); -} - void JitArm64::frspx(UGeckoInstruction inst) { INSTRUCTION_START @@ -448,36 +387,3 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); fpr.Unlock(V0); } - -void JitArm64::fdivx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); -} - -void JitArm64::fdivsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - - m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); -} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index af5bc38481..0bade60745 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -313,11 +313,11 @@ static GekkoOPTemplate table31[] = static GekkoOPTemplate table59[] = { - {18, &JitArm64::fdivsx}, // fdivsx - {20, &JitArm64::fsubsx}, // fsubsx - {21, &JitArm64::faddsx}, // faddsx + {18, &JitArm64::fp_arith}, // fdivsx + {20, &JitArm64::fp_arith}, // fsubsx + {21, &JitArm64::fp_arith}, // faddsx {24, &JitArm64::FallBackToInterpreter}, // fresx - {25, &JitArm64::fmulsx}, // fmulsx + {25, &JitArm64::fp_arith}, // fmulsx {28, &JitArm64::fmsubsx}, // fmsubsx {29, &JitArm64::fmaddsx}, // fmaddsx {30, &JitArm64::fnmsubsx}, // fnmsubsx @@ -346,11 +346,11 @@ static GekkoOPTemplate table63[] = static GekkoOPTemplate table63_2[] = { - {18, &JitArm64::fdivx}, // fdivx - {20, &JitArm64::fsubx}, // fsubx - {21, &JitArm64::faddx}, // faddx + {18, &JitArm64::fp_arith}, // fdivx + {20, &JitArm64::fp_arith}, // fsubx + {21, &JitArm64::fp_arith}, // faddx {23, &JitArm64::fselx}, // fselx - {25, &JitArm64::fmulx}, // fmulx + {25, &JitArm64::fp_arith}, // fmulx {26, &JitArm64::FallBackToInterpreter}, // frsqrtex {28, &JitArm64::fmsubx}, // fmsubx {29, &JitArm64::fmaddx}, // fmaddx From 9c048bbc361227bbba3b9326484aabab11fb2967 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 10 Feb 2016 15:00:18 +0100 Subject: [PATCH 02/10] JitArm64: Also merge 3 way FP-PS instructions. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 4 -- .../JitArm64/JitArm64_FloatingPoint.cpp | 52 ++++++++------ .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 68 ------------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 8 +-- 4 files changed, 35 insertions(+), 97 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 1016ddec9f..cd070d1feb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -157,8 +157,6 @@ public: // Paired void ps_abs(UGeckoInstruction inst); - void ps_add(UGeckoInstruction inst); - void ps_div(UGeckoInstruction inst); void ps_madd(UGeckoInstruction inst); void ps_madds0(UGeckoInstruction inst); void ps_madds1(UGeckoInstruction inst); @@ -168,7 +166,6 @@ public: void ps_merge11(UGeckoInstruction inst); void ps_mr(UGeckoInstruction inst); void ps_msub(UGeckoInstruction inst); - void ps_mul(UGeckoInstruction inst); void ps_muls0(UGeckoInstruction inst); void ps_muls1(UGeckoInstruction inst); void ps_nabs(UGeckoInstruction inst); @@ -177,7 +174,6 @@ public: void ps_neg(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); - void ps_sub(UGeckoInstruction inst); void ps_sum0(UGeckoInstruction inst); void ps_sum1(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index e30a3ca3aa..1062e302c2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -40,31 +40,41 @@ void JitArm64::fp_arith(UGeckoInstruction inst) u32 a = inst.FA, d = inst.FD; u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB; - bool single = inst.OPCD == 4 || inst.OPCD == 59; + bool single = inst.OPCD == 59; + bool packed = inst.OPCD == 4; - ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); - ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); - ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); - - switch (inst.SUBOP5) + if (packed) { - case 18: - m_float_emit.FDIV(VD, VA, VB); - break; - case 20: - m_float_emit.FSUB(VD, VA, VB); - break; - case 21: - m_float_emit.FADD(VD, VA, VB); - break; - case 25: - m_float_emit.FMUL(VD, VA, VB); - break; - default: - _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + + switch (inst.SUBOP5) + { + case 18: m_float_emit.FDIV(64, VD, VA, VB); break; + case 20: m_float_emit.FSUB(64, VD, VA, VB); break; + case 21: m_float_emit.FADD(64, VD, VA, VB); break; + case 25: m_float_emit.FMUL(64, VD, VA, VB); break; + default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + } + } + else + { + ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); + ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); + ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); + + switch (inst.SUBOP5) + { + case 18: m_float_emit.FDIV(VD, VA, VB); break; + case 20: m_float_emit.FSUB(VD, VA, VB); break; + case 21: m_float_emit.FADD(VD, VA, VB); break; + case 25: m_float_emit.FMUL(VD, VA, VB); break; + default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + } } - if (single) + if (single || packed) fpr.FixSinglePrecision(d); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 8b774978cd..fc84d0d451 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -31,40 +31,6 @@ void JitArm64::ps_abs(UGeckoInstruction inst) m_float_emit.FABS(64, VD, VB); } -void JitArm64::ps_add(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FADD(64, VD, VA, VB); - fpr.FixSinglePrecision(d); -} - -void JitArm64::ps_div(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FDIV(64, VD, VA, VB); - fpr.FixSinglePrecision(d); -} - void JitArm64::ps_madd(UGeckoInstruction inst) { INSTRUCTION_START @@ -223,23 +189,6 @@ void JitArm64::ps_mr(UGeckoInstruction inst) m_float_emit.ORR(VD, VB, VB); } -void JitArm64::ps_mul(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FMUL(64, VD, VA, VC); - fpr.FixSinglePrecision(d); -} - void JitArm64::ps_muls0(UGeckoInstruction inst) { INSTRUCTION_START @@ -421,23 +370,6 @@ void JitArm64::ps_sel(UGeckoInstruction inst) } } -void JitArm64::ps_sub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FSUB(64, VD, VA, VB); - fpr.FixSinglePrecision(d); -} - void JitArm64::ps_sum0(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 0bade60745..bb7cc8fe6c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -126,12 +126,12 @@ static GekkoOPTemplate table4_2[] = {13, &JitArm64::ps_muls1}, // ps_muls1 {14, &JitArm64::ps_madds0}, // ps_madds0 {15, &JitArm64::ps_madds1}, // ps_madds1 - {18, &JitArm64::ps_div}, // ps_div - {20, &JitArm64::ps_sub}, // ps_sub - {21, &JitArm64::ps_add}, // ps_add + {18, &JitArm64::fp_arith}, // ps_div + {20, &JitArm64::fp_arith}, // ps_sub + {21, &JitArm64::fp_arith}, // ps_add {23, &JitArm64::ps_sel}, // ps_sel {24, &JitArm64::ps_res}, // ps_res - {25, &JitArm64::ps_mul}, // ps_mul + {25, &JitArm64::fp_arith}, // ps_mul {26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte {28, &JitArm64::ps_msub}, // ps_msub {29, &JitArm64::ps_madd}, // ps_madd From 19713f7c14694d3714b89ab42d3b104b2eafad75 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 10 Feb 2016 15:41:14 +0100 Subject: [PATCH 03/10] JitArm64: Merge scalar 4-operant instructions. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 8 - .../JitArm64/JitArm64_FloatingPoint.cpp | 201 +++--------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 16 +- 3 files changed, 37 insertions(+), 188 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index cd070d1feb..3c70a716ae 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -139,17 +139,9 @@ public: // Floating point void fp_arith(UGeckoInstruction inst); void fabsx(UGeckoInstruction inst); - void fmaddsx(UGeckoInstruction inst); - void fmaddx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst); - void fmsubsx(UGeckoInstruction inst); - void fmsubx(UGeckoInstruction inst); void fnabsx(UGeckoInstruction inst); void fnegx(UGeckoInstruction inst); - void fnmaddsx(UGeckoInstruction inst); - void fnmaddx(UGeckoInstruction inst); - void fnmsubsx(UGeckoInstruction inst); - void fnmsubx(UGeckoInstruction inst); void fselx(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 1062e302c2..52af9af6a6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -37,40 +37,55 @@ void JitArm64::fp_arith(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - u32 a = inst.FA, d = inst.FD; - u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB; + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + u32 op5 = inst.SUBOP5; bool single = inst.OPCD == 59; bool packed = inst.OPCD == 4; + bool use_c = op5 >= 25; // fmul and all kind of fmaddXX + bool use_b = op5 != 25; // fmul uses no B + + ARM64Reg VA, VB, VC, VD; + if (packed) { - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + VA = fpr.R(a, REG_REG); + if (use_b) + VB = fpr.R(b, REG_REG); + if (use_c) + VC = fpr.R(c, REG_REG); + VD = fpr.RW(d, REG_REG); - switch (inst.SUBOP5) + switch (op5) { case 18: m_float_emit.FDIV(64, VD, VA, VB); break; case 20: m_float_emit.FSUB(64, VD, VA, VB); break; case 21: m_float_emit.FADD(64, VD, VA, VB); break; - case 25: m_float_emit.FMUL(64, VD, VA, VB); break; - default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + case 25: m_float_emit.FMUL(64, VD, VA, VC); break; + default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break; } } else { - ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); - ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); - ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); + VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); + if (use_b) + VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); + if (use_c) + VC = EncodeRegToDouble(fpr.R(c, REG_IS_LOADED)); + VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); - switch (inst.SUBOP5) + switch (op5) { case 18: m_float_emit.FDIV(VD, VA, VB); break; case 20: m_float_emit.FSUB(VD, VA, VB); break; case 21: m_float_emit.FADD(VD, VA, VB); break; - case 25: m_float_emit.FMUL(VD, VA, VB); break; - default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + case 25: m_float_emit.FMUL(VD, VA, VC); break; + case 28: m_float_emit.FNMSUB(VD, VA, VC, VB); break; // fmsub: "D = A*C - B" vs "Vd = (-Va) + Vn*Vm" + case 29: m_float_emit.FMADD(VD, VA, VC, VB); break; // fmadd: "D = A*C + B" vs "Vd = Va + Vn*Vm" + case 30: m_float_emit.FMSUB(VD, VA, VC, VB); break; // fnmsub: "D = -(A*C - B)" vs "Vd = Va + (-Vn)*Vm" + case 31: m_float_emit.FNMADD(VD, VA, VC, VB); break; // fnmadd: "D = -(A*C + B)" vs "Vd = (-Va) + (-Vn)*Vm" + default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break; } } @@ -78,45 +93,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst) fpr.FixSinglePrecision(d); } -void JitArm64::fmaddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fmaddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - void JitArm64::fmrx(UGeckoInstruction inst) { INSTRUCTION_START @@ -131,45 +107,6 @@ void JitArm64::fmrx(UGeckoInstruction inst) m_float_emit.INS(64, VD, 0, VB, 0); } -void JitArm64::fmsubsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fmsubx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FNMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - void JitArm64::fnabsx(UGeckoInstruction inst) { INSTRUCTION_START @@ -199,86 +136,6 @@ void JitArm64::fnegx(UGeckoInstruction inst) m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); } -void JitArm64::fnmaddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fnmaddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FNMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - -void JitArm64::fnmsubsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); - m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::fnmsubx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB)); -} - void JitArm64::fselx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index bb7cc8fe6c..22752577e0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -318,10 +318,10 @@ static GekkoOPTemplate table59[] = {21, &JitArm64::fp_arith}, // faddsx {24, &JitArm64::FallBackToInterpreter}, // fresx {25, &JitArm64::fp_arith}, // fmulsx - {28, &JitArm64::fmsubsx}, // fmsubsx - {29, &JitArm64::fmaddsx}, // fmaddsx - {30, &JitArm64::fnmsubsx}, // fnmsubsx - {31, &JitArm64::fnmaddsx}, // fnmaddsx + {28, &JitArm64::fp_arith}, // fmsubsx + {29, &JitArm64::fp_arith}, // fmaddsx + {30, &JitArm64::fp_arith}, // fnmsubsx + {31, &JitArm64::fp_arith}, // fnmaddsx }; static GekkoOPTemplate table63[] = @@ -352,10 +352,10 @@ static GekkoOPTemplate table63_2[] = {23, &JitArm64::fselx}, // fselx {25, &JitArm64::fp_arith}, // fmulx {26, &JitArm64::FallBackToInterpreter}, // frsqrtex - {28, &JitArm64::fmsubx}, // fmsubx - {29, &JitArm64::fmaddx}, // fmaddx - {30, &JitArm64::fnmsubx}, // fnmsubx - {31, &JitArm64::fnmaddx}, // fnmaddx + {28, &JitArm64::fp_arith}, // fmsubx + {29, &JitArm64::fp_arith}, // fmaddx + {30, &JitArm64::fp_arith}, // fnmsubx + {31, &JitArm64::fp_arith}, // fnmaddx }; From 1b6d9dfc4e976f7e34e0133f8a4f339375770394 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:09:04 +0100 Subject: [PATCH 04/10] JitArm64: Merge ps_maddsX. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 +-- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 27 +++---------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 4 +-- 3 files changed, 6 insertions(+), 28 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 3c70a716ae..7e7ff5ad38 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -150,8 +150,7 @@ public: // Paired void ps_abs(UGeckoInstruction inst); void ps_madd(UGeckoInstruction inst); - void ps_madds0(UGeckoInstruction inst); - void ps_madds1(UGeckoInstruction inst); + void ps_maddsX(UGeckoInstruction inst); void ps_merge00(UGeckoInstruction inst); void ps_merge01(UGeckoInstruction inst); void ps_merge10(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index fc84d0d451..4688231677 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -53,7 +53,7 @@ void JitArm64::ps_madd(UGeckoInstruction inst) fpr.Unlock(V0); } -void JitArm64::ps_madds0(UGeckoInstruction inst) +void JitArm64::ps_maddsX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -62,28 +62,7 @@ void JitArm64::ps_madds0(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VC, 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_madds1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + bool upper = inst.SUBOP5 == 15; ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VB = fpr.R(b, REG_REG); @@ -91,7 +70,7 @@ void JitArm64::ps_madds1(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(64, V0, VC, 1); + m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); m_float_emit.FMUL(64, V0, V0, VA); m_float_emit.FADD(64, VD, V0, VB); fpr.FixSinglePrecision(d); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 22752577e0..29e3cb4aff 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -124,8 +124,8 @@ static GekkoOPTemplate table4_2[] = {11, &JitArm64::ps_sum1}, // ps_sum1 {12, &JitArm64::ps_muls0}, // ps_muls0 {13, &JitArm64::ps_muls1}, // ps_muls1 - {14, &JitArm64::ps_madds0}, // ps_madds0 - {15, &JitArm64::ps_madds1}, // ps_madds1 + {14, &JitArm64::ps_maddsX}, // ps_madds0 + {15, &JitArm64::ps_maddsX}, // ps_madds1 {18, &JitArm64::fp_arith}, // ps_div {20, &JitArm64::fp_arith}, // ps_sub {21, &JitArm64::fp_arith}, // ps_add From 09af32c0636f0e65d711ee1ac46997f87e8e5ff4 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:13:59 +0100 Subject: [PATCH 05/10] JitArm64: Merge ps_mergeXX. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 +- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 88 +++++++------------ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 8 +- 3 files changed, 35 insertions(+), 66 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 7e7ff5ad38..c4b5df628f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -151,10 +151,7 @@ public: void ps_abs(UGeckoInstruction inst); void ps_madd(UGeckoInstruction inst); void ps_maddsX(UGeckoInstruction inst); - void ps_merge00(UGeckoInstruction inst); - void ps_merge01(UGeckoInstruction inst); - void ps_merge10(UGeckoInstruction inst); - void ps_merge11(UGeckoInstruction inst); + void ps_mergeXX(UGeckoInstruction inst); void ps_mr(UGeckoInstruction inst); void ps_msub(UGeckoInstruction inst); void ps_muls0(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 4688231677..a516a22c87 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -78,7 +78,7 @@ void JitArm64::ps_maddsX(UGeckoInstruction inst) fpr.Unlock(V0); } -void JitArm64::ps_merge00(UGeckoInstruction inst) +void JitArm64::ps_mergeXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -90,65 +90,37 @@ void JitArm64::ps_merge00(UGeckoInstruction inst) ARM64Reg VB = fpr.R(b, REG_REG); ARM64Reg VD = fpr.RW(d, REG_REG); - m_float_emit.TRN1(64, VD, VA, VB); -} - -void JitArm64::ps_merge01(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.INS(64, VD, 0, VA, 0); - m_float_emit.INS(64, VD, 1, VB, 1); -} - -void JitArm64::ps_merge10(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - if (d != a && d != b) + switch (inst.SUBOP10) { - m_float_emit.INS(64, VD, 0, VA, 1); - m_float_emit.INS(64, VD, 1, VB, 0); + case 528: //00 + m_float_emit.TRN1(64, VD, VA, VB); + break; + case 560: //01 + m_float_emit.INS(64, VD, 0, VA, 0); + m_float_emit.INS(64, VD, 1, VB, 1); + break; + case 592: //10 + if (d != a && d != b) + { + m_float_emit.INS(64, VD, 0, VA, 1); + m_float_emit.INS(64, VD, 1, VB, 0); + } + else + { + ARM64Reg V0 = fpr.GetReg(); + m_float_emit.INS(64, V0, 0, VA, 1); + m_float_emit.INS(64, V0, 1, VB, 0); + m_float_emit.ORR(VD, V0, V0); + fpr.Unlock(V0); + } + break; + case 624: //11 + m_float_emit.TRN2(64, VD, VA, VB); + break; + default: + _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); + break; } - else - { - ARM64Reg V0 = fpr.GetReg(); - m_float_emit.INS(64, V0, 0, VA, 1); - m_float_emit.INS(64, V0, 1, VB, 0); - m_float_emit.ORR(VD, V0, V0); - fpr.Unlock(V0); - } -} - -void JitArm64::ps_merge11(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.TRN2(64, VD, VA, VB); } void JitArm64::ps_mr(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 29e3cb4aff..49c8967dfd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -110,10 +110,10 @@ static GekkoOPTemplate table4[] = {64, &JitArm64::FallBackToInterpreter}, // ps_cmpu1 {72, &JitArm64::ps_mr}, // ps_mr {96, &JitArm64::FallBackToInterpreter}, // ps_cmpo1 - {528, &JitArm64::ps_merge00}, // ps_merge00 - {560, &JitArm64::ps_merge01}, // ps_merge01 - {592, &JitArm64::ps_merge10}, // ps_merge10 - {624, &JitArm64::ps_merge11}, // ps_merge11 + {528, &JitArm64::ps_mergeXX}, // ps_merge00 + {560, &JitArm64::ps_mergeXX}, // ps_merge01 + {592, &JitArm64::ps_mergeXX}, // ps_merge10 + {624, &JitArm64::ps_mergeXX}, // ps_merge11 {1014, &JitArm64::FallBackToInterpreter}, // dcbz_l }; From 157404fd1e761db1ea329c7b0a37cef7547d4671 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:16:29 +0100 Subject: [PATCH 06/10] JitArm64: Merge ps_mulsX. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 +-- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 24 +++---------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 4 ++-- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index c4b5df628f..cdff0ebdde 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -154,8 +154,7 @@ public: void ps_mergeXX(UGeckoInstruction inst); void ps_mr(UGeckoInstruction inst); void ps_msub(UGeckoInstruction inst); - void ps_muls0(UGeckoInstruction inst); - void ps_muls1(UGeckoInstruction inst); + void ps_mulsX(UGeckoInstruction inst); void ps_nabs(UGeckoInstruction inst); void ps_nmadd(UGeckoInstruction inst); void ps_nmsub(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index a516a22c87..b815f3e593 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -140,7 +140,7 @@ void JitArm64::ps_mr(UGeckoInstruction inst) m_float_emit.ORR(VD, VB, VB); } -void JitArm64::ps_muls0(UGeckoInstruction inst) +void JitArm64::ps_mulsX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -149,32 +149,14 @@ void JitArm64::ps_muls0(UGeckoInstruction inst) u32 a = inst.FA, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VC, 0); - m_float_emit.FMUL(64, VD, VA, V0); - fpr.FixSinglePrecision(d); - fpr.Unlock(V0); -} - -void JitArm64::ps_muls1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; + bool upper = inst.SUBOP5 == 13; ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VC = fpr.R(c, REG_REG); ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(64, V0, VC, 1); + m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); m_float_emit.FMUL(64, VD, VA, V0); fpr.FixSinglePrecision(d); fpr.Unlock(V0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 49c8967dfd..f704ffd0c2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -122,8 +122,8 @@ static GekkoOPTemplate table4_2[] = { {10, &JitArm64::ps_sum0}, // ps_sum0 {11, &JitArm64::ps_sum1}, // ps_sum1 - {12, &JitArm64::ps_muls0}, // ps_muls0 - {13, &JitArm64::ps_muls1}, // ps_muls1 + {12, &JitArm64::ps_mulsX}, // ps_muls0 + {13, &JitArm64::ps_mulsX}, // ps_muls1 {14, &JitArm64::ps_maddsX}, // ps_madds0 {15, &JitArm64::ps_maddsX}, // ps_madds1 {18, &JitArm64::fp_arith}, // ps_div From 52f9912c46345b21b20be0795a471a744389d305 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 10 Feb 2016 16:00:20 +0100 Subject: [PATCH 07/10] ArmJit64: Merge FP two operant instructions. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 +- .../JitArm64/JitArm64_FloatingPoint.cpp | 54 ++++--------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 8 +-- 3 files changed, 16 insertions(+), 51 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index cdff0ebdde..7bd62b0aab 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -138,10 +138,7 @@ public: // Floating point void fp_arith(UGeckoInstruction inst); - void fabsx(UGeckoInstruction inst); - void fmrx(UGeckoInstruction inst); - void fnabsx(UGeckoInstruction inst); - void fnegx(UGeckoInstruction inst); + void fp_logic(UGeckoInstruction inst); void fselx(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 52af9af6a6..9d96ed70c9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -17,19 +17,6 @@ using namespace Arm64Gen; -void JitArm64::fabsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); -} - void JitArm64::fp_arith(UGeckoInstruction inst) { INSTRUCTION_START @@ -93,7 +80,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) fpr.FixSinglePrecision(d); } -void JitArm64::fmrx(UGeckoInstruction inst) +void JitArm64::fp_logic(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); @@ -101,39 +88,20 @@ void JitArm64::fmrx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.INS(64, VD, 0, VB, 0); -} - -void JitArm64::fnabsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; + u32 op10 = inst.SUBOP10; ARM64Reg VB = fpr.R(b, REG_IS_LOADED); ARM64Reg VD = fpr.RW(d); - m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); -} - -void JitArm64::fnegx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); - - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + switch (op10) + { + case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; + case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; + case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; + case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; + default: _assert_msg_(DYNA_REC, 0, "fp_logic WTF!!!"); + } } void JitArm64::fselx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index f704ffd0c2..7a109ff14a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -326,14 +326,14 @@ static GekkoOPTemplate table59[] = static GekkoOPTemplate table63[] = { - {264, &JitArm64::fabsx}, // fabsx + {264, &JitArm64::fp_logic}, // fabsx {32, &JitArm64::fcmpX}, // fcmpo {0, &JitArm64::fcmpX}, // fcmpu {14, &JitArm64::FallBackToInterpreter}, // fctiwx {15, &JitArm64::fctiwzx}, // fctiwzx - {72, &JitArm64::fmrx}, // fmrx - {136, &JitArm64::fnabsx}, // fnabsx - {40, &JitArm64::fnegx}, // fnegx + {72, &JitArm64::fp_logic}, // fmrx + {136, &JitArm64::fp_logic}, // fnabsx + {40, &JitArm64::fp_logic}, // fnegx {12, &JitArm64::frspx}, // frspx {64, &JitArm64::FallBackToInterpreter}, // mcrfs From 2d8e1bc34d1491b3cf39b0a8e0cbad7d2c024e8d Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:28:05 +0100 Subject: [PATCH 08/10] JitArm64: Merge PS stuff into fp_logic. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 4 -- .../JitArm64/JitArm64_FloatingPoint.cpp | 43 +++++++++---- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 60 ------------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 8 +-- 4 files changed, 37 insertions(+), 78 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 7bd62b0aab..6c776e2230 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -145,17 +145,13 @@ public: void fctiwzx(UGeckoInstruction inst); // Paired - void ps_abs(UGeckoInstruction inst); void ps_madd(UGeckoInstruction inst); void ps_maddsX(UGeckoInstruction inst); void ps_mergeXX(UGeckoInstruction inst); - void ps_mr(UGeckoInstruction inst); void ps_msub(UGeckoInstruction inst); void ps_mulsX(UGeckoInstruction inst); - void ps_nabs(UGeckoInstruction inst); void ps_nmadd(UGeckoInstruction inst); void ps_nmsub(UGeckoInstruction inst); - void ps_neg(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); void ps_sum0(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 9d96ed70c9..f1f956db69 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -87,20 +87,43 @@ void JitArm64::fp_logic(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - u32 op10 = inst.SUBOP10; - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); + bool packed = inst.OPCD == 4; - switch (op10) + // MR with source === dest => no-op + if (op10 == 72 && b == d) + return; + + if (packed) { - case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; - case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; - case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; - case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; - default: _assert_msg_(DYNA_REC, 0, "fp_logic WTF!!!"); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + + switch (op10) + { + case 40: m_float_emit.FNEG(64, VD, VB); break; + case 72: m_float_emit.ORR(VD, VB, VB); break; + case 136: m_float_emit.FABS(64, VD, VB); + m_float_emit.FNEG(64, VD, VD); break; + case 264: m_float_emit.FABS(64, VD, VB); break; + default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; + } + } + else + { + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VD = fpr.RW(d); + + switch (op10) + { + case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; + case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; + case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; + case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; + default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; + } } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index b815f3e593..8665adb912 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -17,20 +17,6 @@ using namespace Arm64Gen; -void JitArm64::ps_abs(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FABS(64, VD, VB); -} - void JitArm64::ps_madd(UGeckoInstruction inst) { INSTRUCTION_START @@ -123,23 +109,6 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst) } } -void JitArm64::ps_mr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - if (d == b) - return; - - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.ORR(VD, VB, VB); -} - void JitArm64::ps_mulsX(UGeckoInstruction inst) { INSTRUCTION_START @@ -184,35 +153,6 @@ void JitArm64::ps_msub(UGeckoInstruction inst) fpr.Unlock(V0); } -void JitArm64::ps_nabs(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FABS(64, VD, VB); - m_float_emit.FNEG(64, VD, VD); -} - -void JitArm64::ps_neg(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FNEG(64, VD, VB); -} - void JitArm64::ps_nmadd(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 7a109ff14a..7b7c38a481 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -104,11 +104,11 @@ static GekkoOPTemplate table4[] = { //SUBOP10 {0, &JitArm64::FallBackToInterpreter}, // ps_cmpu0 {32, &JitArm64::FallBackToInterpreter}, // ps_cmpo0 - {40, &JitArm64::ps_neg}, // ps_neg - {136, &JitArm64::ps_nabs}, // ps_nabs - {264, &JitArm64::ps_abs}, // ps_abs + {40, &JitArm64::fp_logic}, // ps_neg + {136, &JitArm64::fp_logic}, // ps_nabs + {264, &JitArm64::fp_logic}, // ps_abs {64, &JitArm64::FallBackToInterpreter}, // ps_cmpu1 - {72, &JitArm64::ps_mr}, // ps_mr + {72, &JitArm64::fp_logic}, // ps_mr {96, &JitArm64::FallBackToInterpreter}, // ps_cmpo1 {528, &JitArm64::ps_mergeXX}, // ps_merge00 {560, &JitArm64::ps_mergeXX}, // ps_merge01 From 8b32cd073837aa8b84772828dbf9d32d4dbd1c00 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:32:53 +0100 Subject: [PATCH 09/10] JitArm64: Merge ps_sumX. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 +- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 136 +++++++----------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 4 +- 3 files changed, 56 insertions(+), 87 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6c776e2230..dfc2f58fe4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -154,8 +154,7 @@ public: void ps_nmsub(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); - void ps_sum0(UGeckoInstruction inst); - void ps_sum1(UGeckoInstruction inst); + void ps_sumX(UGeckoInstruction inst); // Loadstore paired void psq_l(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 8665adb912..67d58bde05 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -17,53 +17,6 @@ using namespace Arm64Gen; -void JitArm64::ps_madd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_maddsX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - bool upper = inst.SUBOP5 == 15; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - void JitArm64::ps_mergeXX(UGeckoInstruction inst) { INSTRUCTION_START @@ -130,6 +83,52 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) fpr.FixSinglePrecision(d); fpr.Unlock(V0); } +void JitArm64::ps_madd(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); + + fpr.Unlock(V0); +} + +void JitArm64::ps_maddsX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + bool upper = inst.SUBOP5 == 15; + + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); + + fpr.Unlock(V0); +} void JitArm64::ps_msub(UGeckoInstruction inst) { @@ -243,7 +242,7 @@ void JitArm64::ps_sel(UGeckoInstruction inst) } } -void JitArm64::ps_sum0(UGeckoInstruction inst) +void JitArm64::ps_sumX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -252,36 +251,7 @@ void JitArm64::ps_sum0(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VB, 1); - if (d != c) - { - m_float_emit.FADD(64, VD, V0, VA); - m_float_emit.INS(64, VD, 1, VC, 1); - } - else - { - m_float_emit.FADD(64, V0, V0, VA); - m_float_emit.INS(64, VD, 0, V0, 0); - } - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_sum1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + bool upper = inst.SUBOP5 == 11; ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VB = fpr.R(b, REG_REG); @@ -289,16 +259,16 @@ void JitArm64::ps_sum1(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(64, V0, VA, 0); + m_float_emit.DUP(64, V0, upper ? VA : VB, upper ? 0 : 1); if (d != c) { - m_float_emit.FADD(64, VD, V0, VB); - m_float_emit.INS(64, VD, 0, VC, 0); + m_float_emit.FADD(64, VD, V0, upper ? VB : VA); + m_float_emit.INS(64, VD, upper ? 0 : 1, VC, upper ? 0 : 1); } else { - m_float_emit.FADD(64, V0, V0, VB); - m_float_emit.INS(64, VD, 1, V0, 1); + m_float_emit.FADD(64, V0, V0, upper ? VB : VA); + m_float_emit.INS(64, VD, upper ? 1 : 0, V0, upper ? 1 : 0); } fpr.FixSinglePrecision(d); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 7b7c38a481..eb6722a474 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -120,8 +120,8 @@ static GekkoOPTemplate table4[] = static GekkoOPTemplate table4_2[] = { - {10, &JitArm64::ps_sum0}, // ps_sum0 - {11, &JitArm64::ps_sum1}, // ps_sum1 + {10, &JitArm64::ps_sumX}, // ps_sum0 + {11, &JitArm64::ps_sumX}, // ps_sum1 {12, &JitArm64::ps_mulsX}, // ps_muls0 {13, &JitArm64::ps_mulsX}, // ps_muls1 {14, &JitArm64::ps_maddsX}, // ps_madds0 From f259a8d6cf8e496cb1c676404242a0790409b9fb Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:50:13 +0100 Subject: [PATCH 10/10] JitArm64: Merge more stuff into ps_maddXX. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 6 +- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 133 +++++------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 12 +- 3 files changed, 44 insertions(+), 107 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index dfc2f58fe4..a15712d36f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -145,13 +145,9 @@ public: void fctiwzx(UGeckoInstruction inst); // Paired - void ps_madd(UGeckoInstruction inst); - void ps_maddsX(UGeckoInstruction inst); + void ps_maddXX(UGeckoInstruction inst); void ps_mergeXX(UGeckoInstruction inst); - void ps_msub(UGeckoInstruction inst); void ps_mulsX(UGeckoInstruction inst); - void ps_nmadd(UGeckoInstruction inst); - void ps_nmsub(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); void ps_sumX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 67d58bde05..c17c22b549 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -83,7 +83,8 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) fpr.FixSinglePrecision(d); fpr.Unlock(V0); } -void JitArm64::ps_madd(UGeckoInstruction inst) + +void JitArm64::ps_maddXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -91,6 +92,7 @@ void JitArm64::ps_madd(UGeckoInstruction inst) FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + u32 op5 = inst.SUBOP5; ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VB = fpr.R(b, REG_REG); @@ -98,101 +100,40 @@ void JitArm64::ps_madd(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_maddsX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - bool upper = inst.SUBOP5 == 15; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_msub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_nmadd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_nmsub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); + switch (op5) + { + case 14: // ps_madds0 + m_float_emit.DUP(64, V0, VC, 0); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + break; + case 15: // ps_madds1 + m_float_emit.DUP(64, V0, VC, 1); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + break; + case 28: // ps_msub + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, VD, V0, VB); + break; + case 29: // ps_madd + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + break; + case 30: // ps_nmsub + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, VD, V0, VB); + m_float_emit.FNEG(64, VD, VD); + break; + case 31: // ps_nmadd + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.FNEG(64, VD, VD); + break; + default: + _assert_msg_(DYNA_REC, 0, "ps_madd - invalid op"); + break; + } fpr.FixSinglePrecision(d); fpr.Unlock(V0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index eb6722a474..700cf42df9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -124,8 +124,8 @@ static GekkoOPTemplate table4_2[] = {11, &JitArm64::ps_sumX}, // ps_sum1 {12, &JitArm64::ps_mulsX}, // ps_muls0 {13, &JitArm64::ps_mulsX}, // ps_muls1 - {14, &JitArm64::ps_maddsX}, // ps_madds0 - {15, &JitArm64::ps_maddsX}, // ps_madds1 + {14, &JitArm64::ps_maddXX}, // ps_madds0 + {15, &JitArm64::ps_maddXX}, // ps_madds1 {18, &JitArm64::fp_arith}, // ps_div {20, &JitArm64::fp_arith}, // ps_sub {21, &JitArm64::fp_arith}, // ps_add @@ -133,10 +133,10 @@ static GekkoOPTemplate table4_2[] = {24, &JitArm64::ps_res}, // ps_res {25, &JitArm64::fp_arith}, // ps_mul {26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte - {28, &JitArm64::ps_msub}, // ps_msub - {29, &JitArm64::ps_madd}, // ps_madd - {30, &JitArm64::ps_nmsub}, // ps_nmsub - {31, &JitArm64::ps_nmadd}, // ps_nmadd + {28, &JitArm64::ps_maddXX}, // ps_msub + {29, &JitArm64::ps_maddXX}, // ps_madd + {30, &JitArm64::ps_maddXX}, // ps_nmsub + {31, &JitArm64::ps_maddXX}, // ps_nmadd };