From 8b32cd073837aa8b84772828dbf9d32d4dbd1c00 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 00:32:53 +0100 Subject: [PATCH] JitArm64: Merge ps_sumX. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 +- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 136 +++++++----------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 4 +- 3 files changed, 56 insertions(+), 87 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6c776e2230..dfc2f58fe4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -154,8 +154,7 @@ public: void ps_nmsub(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); - void ps_sum0(UGeckoInstruction inst); - void ps_sum1(UGeckoInstruction inst); + void ps_sumX(UGeckoInstruction inst); // Loadstore paired void psq_l(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 8665adb912..67d58bde05 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -17,53 +17,6 @@ using namespace Arm64Gen; -void JitArm64::ps_madd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_maddsX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - bool upper = inst.SUBOP5 == 15; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - void JitArm64::ps_mergeXX(UGeckoInstruction inst) { INSTRUCTION_START @@ -130,6 +83,52 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) fpr.FixSinglePrecision(d); fpr.Unlock(V0); } +void JitArm64::ps_madd(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); + + fpr.Unlock(V0); +} + +void JitArm64::ps_maddsX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + FALLBACK_IF(inst.Rc); + FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + bool upper = inst.SUBOP5 == 15; + + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VC = fpr.R(c, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + ARM64Reg V0 = fpr.GetReg(); + + m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); + + fpr.Unlock(V0); +} void JitArm64::ps_msub(UGeckoInstruction inst) { @@ -243,7 +242,7 @@ void JitArm64::ps_sel(UGeckoInstruction inst) } } -void JitArm64::ps_sum0(UGeckoInstruction inst) +void JitArm64::ps_sumX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -252,36 +251,7 @@ void JitArm64::ps_sum0(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VB, 1); - if (d != c) - { - m_float_emit.FADD(64, VD, V0, VA); - m_float_emit.INS(64, VD, 1, VC, 1); - } - else - { - m_float_emit.FADD(64, V0, V0, VA); - m_float_emit.INS(64, VD, 0, V0, 0); - } - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_sum1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + bool upper = inst.SUBOP5 == 11; ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VB = fpr.R(b, REG_REG); @@ -289,16 +259,16 @@ void JitArm64::ps_sum1(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(64, V0, VA, 0); + m_float_emit.DUP(64, V0, upper ? VA : VB, upper ? 0 : 1); if (d != c) { - m_float_emit.FADD(64, VD, V0, VB); - m_float_emit.INS(64, VD, 0, VC, 0); + m_float_emit.FADD(64, VD, V0, upper ? VB : VA); + m_float_emit.INS(64, VD, upper ? 0 : 1, VC, upper ? 0 : 1); } else { - m_float_emit.FADD(64, V0, V0, VB); - m_float_emit.INS(64, VD, 1, V0, 1); + m_float_emit.FADD(64, V0, V0, upper ? VB : VA); + m_float_emit.INS(64, VD, upper ? 1 : 0, V0, upper ? 1 : 0); } fpr.FixSinglePrecision(d); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 7b7c38a481..eb6722a474 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -120,8 +120,8 @@ static GekkoOPTemplate table4[] = static GekkoOPTemplate table4_2[] = { - {10, &JitArm64::ps_sum0}, // ps_sum0 - {11, &JitArm64::ps_sum1}, // ps_sum1 + {10, &JitArm64::ps_sumX}, // ps_sum0 + {11, &JitArm64::ps_sumX}, // ps_sum1 {12, &JitArm64::ps_mulsX}, // ps_muls0 {13, &JitArm64::ps_mulsX}, // ps_muls1 {14, &JitArm64::ps_maddsX}, // ps_madds0