From 1e59dc10255d47ed418ab66f5df816b2fb214c58 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 09:04:05 +0100 Subject: [PATCH 01/19] JitArm64: Single precision tracking. --- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 195 +++++++++--------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 20 ++ 2 files changed, 119 insertions(+), 96 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index f08f7605e3..29df1314fe 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -307,12 +307,26 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) OpArg& reg = m_guest_registers[preg]; IncrementAllUsed(); reg.ResetLastUsed(); + ARM64Reg host_reg = reg.GetReg(); switch (reg.GetType()) { + case REG_REG_SINGLE: + { + // We're asked for singles, so just return the register. + if (type == REG_REG_SINGLE || type == REG_IS_LOADED_SINGLE) + return host_reg; + + // Else convert this register back to doubles. + m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + reg.LoadToReg(host_reg); + + // fall through + } case REG_REG: // already in a reg - return reg.GetReg(); - break; + { + return host_reg; + } case REG_LOWER_PAIR: { if (type == REG_REG) @@ -320,18 +334,34 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) // Load the high 64bits from the file and insert them in to the high 64bits of the host register ARM64Reg tmp_reg = GetReg(); m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1])); - m_float_emit->INS(64, reg.GetReg(), 1, tmp_reg, 0); + m_float_emit->INS(64, host_reg, 1, tmp_reg, 0); UnlockRegister(tmp_reg); // Change it over to a full 128bit register - reg.LoadToReg(reg.GetReg()); + reg.LoadToReg(host_reg); } - return reg.GetReg(); + return host_reg; + } + case REG_DUP_SINGLE: + { + if (type == REG_IS_LOADED_SINGLE) + return host_reg; + + if (type == REG_REG_SINGLE) + { + // Duplicate to the top and change over + m_float_emit->INS(32, host_reg, 1, host_reg, 0); + reg.LoadToRegSingle(host_reg); + return host_reg; + } + + m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + reg.LoadDup(host_reg); + + // fall through } - break; case REG_DUP: { - ARM64Reg host_reg = reg.GetReg(); if (type == REG_REG) { // We are requesting a full 128bit register @@ -342,10 +372,9 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) } return host_reg; } - break; case REG_NOTLOADED: // Register isn't loaded at /all/ { - ARM64Reg host_reg = GetReg(); + host_reg = GetReg(); u32 load_size; if (type == REG_REG) { @@ -361,7 +390,6 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); return host_reg; } - break; default: _dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!"); break; @@ -380,90 +408,43 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type) reg.ResetLastUsed(); reg.SetDirty(true); - switch (reg.GetType()) + + // If not loaded at all, just alloc a new one. + if (reg.GetType() == REG_NOTLOADED) { - case REG_NOTLOADED: - { - ARM64Reg host_reg = GetReg(); - if (type == REG_LOWER_PAIR) - { - reg.LoadLowerReg(host_reg); - } - else if (type == REG_DUP) - { - reg.LoadDup(host_reg); - } - else - { - reg.LoadToReg(host_reg); - } + reg.Load(GetReg(), type); + return reg.GetReg(); } - break; - case REG_LOWER_PAIR: + + // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. + if (type == REG_LOWER_PAIR && was_dirty) { ARM64Reg host_reg = reg.GetReg(); - if (type == REG_REG) + + switch (reg.GetType()) { - // Change it over to a full 128bit register - reg.LoadToReg(host_reg); - } - else if (type == REG_DUP) - { - // Register is already the lower pair - // Just convert it over to a dup - reg.LoadDup(host_reg); - } - } - break; - case REG_REG: - { - ARM64Reg host_reg = reg.GetReg(); - if (type == REG_LOWER_PAIR) - { - // If we only want the lower bits, let's store away the high bits and drop to a lower only register + case REG_REG_SINGLE: + m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + // fall through + case REG_REG: // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - if (was_dirty) - m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); - reg.LoadLowerReg(host_reg); + m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + break; + case REG_DUP_SINGLE: + m_float_emit->FCVT(64, 32, EncodeRegToDouble(reg.GetReg()), EncodeRegToDouble(reg.GetReg())); + // fall through + case REG_DUP: + // Store PSR1 (which is equal to PSR0) in memory. + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + break; + default: + // All other types doesn't store anything in PSR1. + break; } - else if (type == REG_DUP) - { - // If we are going from a full 128bit register to a duplicate - // then we can just change over - reg.LoadDup(host_reg); - } - } - break; - case REG_DUP: - { - ARM64Reg host_reg = reg.GetReg(); - if (type == REG_REG) - { - // We are a duplicated register going to a full 128bit register - // Do an insert of our lower 64bits to the higher 64bits - m_float_emit->INS(64, host_reg, 1, host_reg, 0); - - // Change over to the full 128bit register - reg.LoadToReg(host_reg); - } - else if (type == REG_LOWER_PAIR) - { - // We are duplicated changing over to a lower register - // We've got to be careful in this instance and do a store of our lower 64bits - // to the upper 64bits in the PowerPC state - // That way incase if we hit the path of DUP->LOWER->REG we get the correct bits back - if (was_dirty) - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); - reg.LoadLowerReg(host_reg); - } - } - break; - default: - // Do nothing - break; } + reg.Load(reg.GetReg(), type); return reg.GetReg(); } @@ -510,17 +491,33 @@ bool Arm64FPRCache::IsCalleeSaved(ARM64Reg reg) void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) { OpArg& reg = m_guest_registers[preg]; - if (reg.GetType() == REG_REG || - reg.GetType() == REG_LOWER_PAIR) + ARM64Reg host_reg = reg.GetReg(); + RegType type = reg.GetType(); + bool dirty = reg.IsDirty(); + + // If we're in single mode, just convert it back to a double. + if (type == REG_REG_SINGLE) + { + if (dirty) + m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + type = REG_REG; + } + if (type == REG_DUP_SINGLE) + { + if (dirty) + m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + type = REG_DUP; + } + + if (type == REG_REG || type == REG_LOWER_PAIR) { - ARM64Reg host_reg = reg.GetReg(); u32 store_size; - if (reg.GetType() == REG_REG) + if (type == REG_REG) store_size = 128; else store_size = 64; - if (reg.IsDirty()) + if (dirty) m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); if (!maintain_state) @@ -529,10 +526,9 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) reg.Flush(); } } - else if (reg.GetType() == REG_DUP) + else if (type == REG_DUP) { - ARM64Reg host_reg = reg.GetReg(); - if (reg.IsDirty()) + if (dirty) { // If the paired registers were at the start of ppcState we could do an STP here. // Too bad moving them would break savestate compatibility between x86_64 and AArch64 @@ -564,18 +560,25 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() return registers; } +bool Arm64FPRCache::IsSingle(u32 preg) +{ + RegType type = m_guest_registers[preg].GetType(); + return type == REG_REG_SINGLE || type == REG_DUP_SINGLE; +} + void Arm64FPRCache::FixSinglePrecision(u32 preg) { - ARM64Reg host_reg = m_guest_registers[preg].GetReg(); - switch (m_guest_registers[preg].GetType()) + OpArg& reg = m_guest_registers[preg]; + ARM64Reg host_reg = reg.GetReg(); + switch (reg.GetType()) { case REG_DUP: // only PS0 needs to be converted m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + reg.LoadDupSingle(host_reg); break; case REG_REG: // PS0 and PS1 needs to be converted m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + reg.LoadToRegSingle(host_reg); break; default: break; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 0d2d1808c4..dcf33a0bb5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -23,6 +23,9 @@ enum RegType REG_LOWER_PAIR, // Only the lower pair of a paired register REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded + REG_REG_SINGLE, // Both registers are loaded as single + REG_DUP_SINGLE, // The lower one contains both registers, as single + REG_IS_LOADED_SINGLE, // We only want to access the lower one as single }; enum FlushMode @@ -56,11 +59,21 @@ public: { return m_value; } + void Load(ARM64Reg reg, RegType type) + { + m_type = type; + m_reg = reg; + } void LoadToReg(ARM64Reg reg) { m_type = REG_REG; m_reg = reg; } + void LoadToRegSingle(ARM64Reg reg) + { + m_type = REG_REG_SINGLE; + m_reg = reg; + } void LoadLowerReg(ARM64Reg reg) { m_type = REG_LOWER_PAIR; @@ -71,6 +84,11 @@ public: m_type = REG_DUP; m_reg = reg; } + void LoadDupSingle(ARM64Reg reg) + { + m_type = REG_DUP_SINGLE; + m_reg = reg; + } void LoadToImm(u32 imm) { m_type = REG_IMM; @@ -278,6 +296,8 @@ public: BitSet32 GetCallerSavedUsed() override; + bool IsSingle(u32 preg); + void FixSinglePrecision(u32 preg); protected: From ae1b5ff9e8b7544ec81427142a1a64b49dcc9c24 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 09:45:39 +0100 Subject: [PATCH 02/19] JitArm64: Track single precision in load/store. --- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 2 -- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 2 +- .../JitArm64/JitArm64_LoadStorePaired.cpp | 18 ++++++------------ 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 6f0d296a7f..0a69bf2c9c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -86,7 +86,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, { m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); - m_float_emit.FCVT(64, 32, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } else { @@ -214,7 +213,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, MOVI2R(X30, (u64)&PowerPC::Read_U32); BLR(X30); m_float_emit.INS(32, RS, 0, X0); - m_float_emit.FCVT(64, 32, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } else { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 979296474f..7dabbc8600 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -76,7 +76,7 @@ void JitArm64::lfXX(UGeckoInstruction inst) u32 imm_addr = 0; bool is_immediate = false; - RegType type = !!(flags & BackPatchInfo::FLAG_SIZE_F64) ? REG_LOWER_PAIR : REG_DUP; + RegType type = !!(flags & BackPatchInfo::FLAG_SIZE_F64) ? REG_LOWER_PAIR : REG_DUP_SINGLE; gpr.Lock(W0, W30); fpr.Lock(Q0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 0f2e51f616..9878d98511 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -62,20 +62,17 @@ void JitArm64::psq_l(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - VS = fpr.RW(inst.RS, REG_REG); + VS = fpr.RW(inst.RS, REG_REG_SINGLE); if (!inst.W) { ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28); m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg)); - m_float_emit.REV32(8, VS, VS); - m_float_emit.FCVTL(64, VS, VS); } else { m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28); - m_float_emit.REV32(8, VS, VS); - m_float_emit.FCVT(64, 32, EncodeRegToDouble(VS), EncodeRegToDouble(VS)); } + m_float_emit.REV32(8, EncodeRegToDouble(VS), EncodeRegToDouble(VS)); } else { @@ -87,17 +84,14 @@ void JitArm64::psq_l(UGeckoInstruction inst) LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true)); BLR(X30); - VS = fpr.RW(inst.RS, REG_REG); - if (!inst.W) - m_float_emit.FCVTL(64, VS, D0); - else - m_float_emit.FCVT(64, 32, EncodeRegToDouble(VS), D0); + VS = fpr.RW(inst.RS, REG_REG_SINGLE); + m_float_emit.ORR(EncodeRegToDouble(VS), D0, D0); } if (inst.W) { - m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double - m_float_emit.INS(64, VS, 1, Q0, 0); + m_float_emit.FMOV(S0, 0x70); // 1.0 as a Single + m_float_emit.INS(32, VS, 1, Q0, 0); } gpr.Unlock(W0, W1, W2, W30); From 003288a886698dbc6486e0da1cbbc36e675b187a Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 10:19:21 +0100 Subject: [PATCH 03/19] JitArm64: Track singles in fp_arith. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index f1f956db69..73031b0df6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -33,34 +33,44 @@ void JitArm64::fp_arith(UGeckoInstruction inst) bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_b = op5 != 25; // fmul uses no B + bool inputs_are_singles = fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c)); + ARM64Reg VA, VB, VC, VD; if (packed) { - VA = fpr.R(a, REG_REG); + RegType type = inputs_are_singles ? REG_REG_SINGLE : REG_REG; + u8 size = inputs_are_singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = inputs_are_singles ? EncodeRegToDouble : EncodeRegToQuad; + + VA = reg_encoder(fpr.R(a, type)); if (use_b) - VB = fpr.R(b, REG_REG); + VB = reg_encoder(fpr.R(b, type)); if (use_c) - VC = fpr.R(c, REG_REG); - VD = fpr.RW(d, REG_REG); + VC = reg_encoder(fpr.R(c, type)); + VD = reg_encoder(fpr.RW(d, type)); switch (op5) { - case 18: m_float_emit.FDIV(64, VD, VA, VB); break; - case 20: m_float_emit.FSUB(64, VD, VA, VB); break; - case 21: m_float_emit.FADD(64, VD, VA, VB); break; - case 25: m_float_emit.FMUL(64, VD, VA, VC); break; + case 18: m_float_emit.FDIV(size, VD, VA, VB); break; + case 20: m_float_emit.FSUB(size, VD, VA, VB); break; + case 21: m_float_emit.FADD(size, VD, VA, VB); break; + case 25: m_float_emit.FMUL(size, VD, VA, VC); break; default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break; } } else { - VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); + RegType type = (inputs_are_singles && single) ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; + RegType type_out = single ? (inputs_are_singles ? REG_DUP_SINGLE : REG_DUP) : REG_LOWER_PAIR; + ARM64Reg (*reg_encoder)(ARM64Reg) = (inputs_are_singles && single) ? EncodeRegToSingle : EncodeRegToDouble; + + VA = reg_encoder(fpr.R(a, type)); if (use_b) - VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); + VB = reg_encoder(fpr.R(b, type)); if (use_c) - VC = EncodeRegToDouble(fpr.R(c, REG_IS_LOADED)); - VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); + VC = reg_encoder(fpr.R(c, type)); + VD = reg_encoder(fpr.RW(d, type_out)); switch (op5) { From 0efdd5cacd4277e5ec5a3d43ab7ce15ab477922f Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:02:25 +0100 Subject: [PATCH 04/19] JitArm64: Track singles in paired fp_logic. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 73031b0df6..3d11e37814 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -105,18 +105,24 @@ void JitArm64::fp_logic(UGeckoInstruction inst) if (op10 == 72 && b == d) return; + bool is_single = fpr.IsSingle(b); + if (packed) { - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + RegType type = is_single ? REG_REG_SINGLE : REG_REG; + u8 size = is_single ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = is_single ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VB = reg_encoder(fpr.R(b, type)); + ARM64Reg VD = reg_encoder(fpr.RW(d, type)); switch (op10) { - case 40: m_float_emit.FNEG(64, VD, VB); break; + case 40: m_float_emit.FNEG(size, VD, VB); break; case 72: m_float_emit.ORR(VD, VB, VB); break; - case 136: m_float_emit.FABS(64, VD, VB); - m_float_emit.FNEG(64, VD, VD); break; - case 264: m_float_emit.FABS(64, VD, VB); break; + case 136: m_float_emit.FABS(size, VD, VB); + m_float_emit.FNEG(size, VD, VD); break; + case 264: m_float_emit.FABS(size, VD, VB); break; default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; } } From 55062951236723d8d72504cb487df7101d55f9bd Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:14:12 +0100 Subject: [PATCH 05/19] JitArm64: Track singles in frspx. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 3d11e37814..40212a35bc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -169,11 +169,22 @@ void JitArm64::frspx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d, REG_DUP); + if (fpr.IsSingle(b)) + { + // Source is already in single precision, so no need to do anything but to copy to PSR1. + ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE); + ARM64Reg VD = fpr.RW(d, REG_DUP_SINGLE); - m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); - m_float_emit.FCVT(64, 32, EncodeRegToDouble(VD), EncodeRegToDouble(VD)); + if (b != d) + m_float_emit.FMOV(EncodeRegToSingle(VD), EncodeRegToSingle(VB)); + } + else + { + ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VD = fpr.RW(d, REG_DUP_SINGLE); + + m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + } } void JitArm64::fcmpX(UGeckoInstruction inst) From 6572790d8b2e2f82e1cb0d20bbeea60cfb8fbd13 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:19:19 +0100 Subject: [PATCH 06/19] JitArm64: Track singles in ps_mergeXX. --- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index c17c22b549..04e9d2ed76 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -25,36 +25,41 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(b); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VA = fpr.R(a, type); + ARM64Reg VB = fpr.R(b, type); + ARM64Reg VD = fpr.RW(d, type); switch (inst.SUBOP10) { case 528: //00 - m_float_emit.TRN1(64, VD, VA, VB); + m_float_emit.TRN1(size, VD, VA, VB); break; case 560: //01 - m_float_emit.INS(64, VD, 0, VA, 0); - m_float_emit.INS(64, VD, 1, VB, 1); + m_float_emit.INS(size, VD, 0, VA, 0); + m_float_emit.INS(size, VD, 1, VB, 1); break; case 592: //10 if (d != a && d != b) { - m_float_emit.INS(64, VD, 0, VA, 1); - m_float_emit.INS(64, VD, 1, VB, 0); + m_float_emit.INS(size, VD, 0, VA, 1); + m_float_emit.INS(size, VD, 1, VB, 0); } else { ARM64Reg V0 = fpr.GetReg(); - m_float_emit.INS(64, V0, 0, VA, 1); - m_float_emit.INS(64, V0, 1, VB, 0); - m_float_emit.ORR(VD, V0, V0); + m_float_emit.INS(size, V0, 0, VA, 1); + m_float_emit.INS(size, V0, 1, VB, 0); + m_float_emit.ORR(reg_encoder(VD), reg_encoder(V0), reg_encoder(V0)); fpr.Unlock(V0); } break; case 624: //11 - m_float_emit.TRN2(64, VD, VA, VB); + m_float_emit.TRN2(size, VD, VA, VB); break; default: _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); From d185d2f7044d59be6356606fdd4620286f32dc28 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:20:45 +0100 Subject: [PATCH 07/19] JitArm64: Track singles in ps_mulsX. --- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 04e9d2ed76..cf7360ab4f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -78,13 +78,19 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) bool upper = inst.SUBOP5 == 13; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(c); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VA = fpr.R(a, type); + ARM64Reg VC = fpr.R(c, type); + ARM64Reg VD = fpr.RW(d, type); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); - m_float_emit.FMUL(64, VD, VA, V0); + m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VC), upper ? 1 : 0); + m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(V0)); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } From ca091b9e92bd885a643f268f209e1d52ffda989f Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:22:11 +0100 Subject: [PATCH 08/19] JitArm64: Track single precision in ps_res. --- .../Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index cf7360ab4f..d79a2e11b1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -159,10 +159,16 @@ void JitArm64::ps_res(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + bool singles = fpr.IsSingle(b); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VB = fpr.R(b, type); + ARM64Reg VD = fpr.RW(d, type); + + m_float_emit.FRSQRTE(size, reg_encoder(VD), reg_encoder(VB)); - m_float_emit.FRSQRTE(64, VD, VB); fpr.FixSinglePrecision(d); } From 7fd68c8761511a5be640a1d3bc32624ad5df205e Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:25:03 +0100 Subject: [PATCH 09/19] JitArm64: Track singles in ps_sumX. --- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index d79a2e11b1..a78ee6896a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -211,23 +211,29 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) bool upper = inst.SUBOP5 == 11; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VA = fpr.R(a, type); + ARM64Reg VB = fpr.R(b, type); + ARM64Reg VC = fpr.R(c, type); + ARM64Reg VD = fpr.RW(d, type); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.DUP(64, V0, upper ? VA : VB, upper ? 0 : 1); + m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(upper ? VA : VB), upper ? 0 : 1); if (d != c) { - m_float_emit.FADD(64, VD, V0, upper ? VB : VA); - m_float_emit.INS(64, VD, upper ? 0 : 1, VC, upper ? 0 : 1); + m_float_emit.FADD(size, reg_encoder(VD), reg_encoder(V0), reg_encoder(upper ? VB : VA)); + m_float_emit.INS(size, VD, upper ? 0 : 1, VC, upper ? 0 : 1); } else { - m_float_emit.FADD(64, V0, V0, upper ? VB : VA); - m_float_emit.INS(64, VD, upper ? 1 : 0, V0, upper ? 1 : 0); + m_float_emit.FADD(size, reg_encoder(V0), reg_encoder(V0), reg_encoder(upper ? VB : VA)); + m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0); } + fpr.FixSinglePrecision(d); fpr.Unlock(V0); From c8948ff8c7a8ab015ab361e8c2d60753ca510a3d Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 11:51:48 +0100 Subject: [PATCH 10/19] JitArm64: Track single precision in ps_maddXX. --- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 52 +++++++++++-------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index a78ee6896a..d6f4568100 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -105,41 +105,49 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 op5 = inst.SUBOP5; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; + + ARM64Reg VA = reg_encoder(fpr.R(a, type)); + ARM64Reg VB = reg_encoder(fpr.R(b, type)); + ARM64Reg VC = reg_encoder(fpr.R(c, type)); + ARM64Reg VD = reg_encoder(fpr.RW(d, type)); + ARM64Reg V0Q = fpr.GetReg(); + ARM64Reg V0 = reg_encoder(V0Q); + + // TODO: Do FMUL and FADD/FSUB in *one* host call to save accuracy. switch (op5) { case 14: // ps_madds0 - m_float_emit.DUP(64, V0, VC, 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.DUP(size, V0, VC, 0); + m_float_emit.FMUL(size, V0, V0, VA); + m_float_emit.FADD(size, VD, V0, VB); break; case 15: // ps_madds1 - m_float_emit.DUP(64, V0, VC, 1); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.DUP(size, V0, VC, 1); + m_float_emit.FMUL(size, V0, V0, VA); + m_float_emit.FADD(size, VD, V0, VB); break; case 28: // ps_msub - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FSUB(size, VD, V0, VB); break; case 29: // ps_madd - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FADD(size, VD, V0, VB); break; case 30: // ps_nmsub - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FSUB(size, VD, V0, VB); + m_float_emit.FNEG(size, VD, VD); break; case 31: // ps_nmadd - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); + m_float_emit.FMUL(size, V0, VA, VC); + m_float_emit.FADD(size, VD, V0, VB); + m_float_emit.FNEG(size, VD, VD); break; default: _assert_msg_(DYNA_REC, 0, "ps_madd - invalid op"); @@ -147,7 +155,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) } fpr.FixSinglePrecision(d); - fpr.Unlock(V0); + fpr.Unlock(V0Q); } void JitArm64::ps_res(UGeckoInstruction inst) From 5fad3d94a0930c3db2137c68fdd712a756036362 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Feb 2016 17:17:19 +0100 Subject: [PATCH 11/19] JitArm64: Track singles in fcmpX. --- .../Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 40212a35bc..e6b512cf2b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -196,8 +196,12 @@ void JitArm64::fcmpX(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB; int crf = inst.CRFD; - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(b); + RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble; + + ARM64Reg VA = reg_encoder(fpr.R(a, type)); + ARM64Reg VB = reg_encoder(fpr.R(b, type)); ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -206,7 +210,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) FixupBranch continue1, continue2, continue3; ORR(XA, ZR, 32, 0, true); - m_float_emit.FCMP(EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + m_float_emit.FCMP(VA, VB); if (a != b) { From fe87462be8d5e6a9ea7aa3b53605ddced2f37bbd Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 12 Feb 2016 11:07:28 +0100 Subject: [PATCH 12/19] JitArm64: Track singles in stfs. --- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 7dabbc8600..5fe4628a8d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -270,7 +270,16 @@ void JitArm64::stfXX(UGeckoInstruction inst) gpr.Lock(W0, W1, W30); fpr.Lock(Q0); - ARM64Reg V0 = fpr.R(inst.FS, REG_IS_LOADED); + bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS); + + ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED); + + if (single) + { + flags &= ~BackPatchInfo::FLAG_SIZE_F32; + flags |= BackPatchInfo::FLAG_SIZE_F32I; + } + ARM64Reg addr_reg = W1; if (update) @@ -407,24 +416,29 @@ void JitArm64::stfXX(UGeckoInstruction inst) ADD(X1, X30, pipe_off); LDR(INDEX_UNSIGNED, W0, X30, count_off); - if (accessSize == 64) + if (flags & BackPatchInfo::FLAG_SIZE_F64) { m_float_emit.REV64(8, Q0, V0); - if (pipe_off) - m_float_emit.STR(64, Q0, X1, ArithOption(X0)); - else - m_float_emit.STR(64, Q0, X30, ArithOption(X0)); } - else if (accessSize == 32) + else if (flags & BackPatchInfo::FLAG_SIZE_F32) { m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0)); m_float_emit.REV32(8, D0, D0); - if (pipe_off) - m_float_emit.STR(32, D0, X1, ArithOption(X0)); - else - m_float_emit.STR(32, D0, X30, ArithOption(X0)); - } + else if (flags & BackPatchInfo::FLAG_SIZE_F32I) + { + m_float_emit.REV32(8, D0, V0); + } + + if (pipe_off) + { + m_float_emit.STR(accessSize, accessSize == 64 ? Q0 : D0, X1, ArithOption(X0)); + } + else + { + m_float_emit.STR(accessSize, accessSize == 64 ? Q0 : D0, X30, ArithOption(X0)); + } + ADD(W0, W0, accessSize >> 3); STR(INDEX_UNSIGNED, W0, X30, count_off); js.fifoBytesThisBlock += accessSize >> 3; From 84395b65f6f1c4e8f9579aab63b5c8c0584dab1f Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 12 Feb 2016 11:22:44 +0100 Subject: [PATCH 13/19] JitArm64: Track singles in psq_st. --- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 12 +++++++ .../JitArm64/JitArm64_LoadStorePaired.cpp | 24 ++++++++++--- .../Core/PowerPC/JitArmCommon/BackPatch.h | 34 ++++++++++--------- 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 0a69bf2c9c..5f4421ee93 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -73,6 +73,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, m_float_emit.REV32(8, D0, D0); m_float_emit.STR(64, Q0, X28, addr); } + else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I) + { + m_float_emit.REV32(8, D0, RS); + m_float_emit.STR(64, Q0, X28, addr); + } else { m_float_emit.REV64(8, Q0, RS); @@ -197,6 +202,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, MOVI2R(X30, (u64)PowerPC::Write_U64); BLR(X30); } + else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I) + { + m_float_emit.UMOV(64, X0, RS, 0); + ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); + MOVI2R(X30, (u64)PowerPC::Write_U64); + BLR(X30); + } else { MOVI2R(X30, (u64)&PowerPC::Write_U64); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 9878d98511..dde52cfad5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -115,8 +115,10 @@ void JitArm64::psq_st(UGeckoInstruction inst) gpr.Lock(W0, W1, W2, W30); fpr.Lock(Q0, Q1); + bool single = fpr.IsSingle(inst.RS); + ARM64Reg arm_addr = gpr.R(inst.RA); - ARM64Reg VS = fpr.R(inst.RS, REG_REG); + ARM64Reg VS = fpr.R(inst.RS, single ? REG_REG_SINGLE : REG_REG); ARM64Reg scale_reg = W0; ARM64Reg addr_reg = W1; @@ -150,7 +152,12 @@ void JitArm64::psq_st(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { u32 flags = BackPatchInfo::FLAG_STORE; - flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2); + + if (single) + flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32I : BackPatchInfo::FLAG_SIZE_F32X2I); + else + flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2); + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, @@ -160,10 +167,17 @@ void JitArm64::psq_st(UGeckoInstruction inst) } else { - if (inst.W) - m_float_emit.FCVT(32, 64, D0, VS); + if (single) + { + m_float_emit.ORR(D0, VS, VS); + } else - m_float_emit.FCVTN(32, D0, VS); + { + if (inst.W) + m_float_emit.FCVT(32, 64, D0, VS); + else + m_float_emit.FCVTN(32, D0, VS); + } LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); UBFM(type_reg, scale_reg, 0, 2); // Type diff --git a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h index c712918883..4e9f7ca199 100644 --- a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h +++ b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h @@ -9,22 +9,24 @@ struct BackPatchInfo { enum { - FLAG_STORE = (1 << 0), - FLAG_LOAD = (1 << 1), - FLAG_SIZE_8 = (1 << 2), - FLAG_SIZE_16 = (1 << 3), - FLAG_SIZE_32 = (1 << 4), - FLAG_SIZE_F32 = (1 << 5), - FLAG_SIZE_F32X2 = (1 << 6), - FLAG_SIZE_F64 = (1 << 7), - FLAG_REVERSE = (1 << 8), - FLAG_EXTEND = (1 << 9), - FLAG_SIZE_F32I = (1 << 10), - FLAG_ZERO_256 = (1 << 11), - FLAG_MASK_FLOAT = FLAG_SIZE_F32 | - FLAG_SIZE_F32X2 | - FLAG_SIZE_F64 | - FLAG_SIZE_F32I, + FLAG_STORE = (1 << 0), + FLAG_LOAD = (1 << 1), + FLAG_SIZE_8 = (1 << 2), + FLAG_SIZE_16 = (1 << 3), + FLAG_SIZE_32 = (1 << 4), + FLAG_SIZE_F32 = (1 << 5), + FLAG_SIZE_F32X2 = (1 << 6), + FLAG_SIZE_F32X2I = (1 << 7), + FLAG_SIZE_F64 = (1 << 8), + FLAG_REVERSE = (1 << 9), + FLAG_EXTEND = (1 << 10), + FLAG_SIZE_F32I = (1 << 11), + FLAG_ZERO_256 = (1 << 12), + FLAG_MASK_FLOAT = FLAG_SIZE_F32 | + FLAG_SIZE_F32X2 | + FLAG_SIZE_F32X2I | + FLAG_SIZE_F64 | + FLAG_SIZE_F32I, }; static u32 GetFlagSize(u32 flags) From c30a66b2d5506369bfe5393b54aa74e8ad8c9d52 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 21 Feb 2016 10:38:24 +0100 Subject: [PATCH 14/19] JitArm64: Track singles in fabs. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 32 ++++++++------ .../JitArm64/JitArm64_LoadStoreFloating.cpp | 2 +- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 43 +++++++++++++++---- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 3 +- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index e6b512cf2b..d3cab02c0b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -33,7 +33,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_b = op5 != 25; // fmul uses no B - bool inputs_are_singles = fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c)); + bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && (!use_c || fpr.IsSingle(c, !packed)); ARM64Reg VA, VB, VC, VD; @@ -105,13 +105,13 @@ void JitArm64::fp_logic(UGeckoInstruction inst) if (op10 == 72 && b == d) return; - bool is_single = fpr.IsSingle(b); + bool single = fpr.IsSingle(b, !packed); + u8 size = single ? 32 : 64; if (packed) { - RegType type = is_single ? REG_REG_SINGLE : REG_REG; - u8 size = is_single ? 32 : 64; - ARM64Reg (*reg_encoder)(ARM64Reg) = is_single ? EncodeRegToDouble : EncodeRegToQuad; + RegType type = single ? REG_REG_SINGLE : REG_REG; + ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToDouble : EncodeRegToQuad; ARM64Reg VB = reg_encoder(fpr.R(b, type)); ARM64Reg VD = reg_encoder(fpr.RW(d, type)); @@ -128,16 +128,20 @@ void JitArm64::fp_logic(UGeckoInstruction inst) } else { - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); + RegType type = single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; + RegType type2 = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; + ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble; + + ARM64Reg VB = fpr.R(b, type); + ARM64Reg VD = fpr.RW(d, type2); switch (op10) { - case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; - case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; - case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; - case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; + case 40: m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VB)); break; + case 72: m_float_emit.INS(size, VD, 0, VB, 0); break; + case 136: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); + m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VD)); break; + case 264: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); break; default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; } } @@ -169,7 +173,7 @@ void JitArm64::frspx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - if (fpr.IsSingle(b)) + if (fpr.IsSingle(b, true)) { // Source is already in single precision, so no need to do anything but to copy to PSR1. ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE); @@ -196,7 +200,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB; int crf = inst.CRFD; - bool singles = fpr.IsSingle(a) && fpr.IsSingle(b); + bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true); RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 5fe4628a8d..946a51a07d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -270,7 +270,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) gpr.Lock(W0, W1, W30); fpr.Lock(Q0); - bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS); + bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true); ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 29df1314fe..3a7c93f55a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -327,6 +327,18 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) { return host_reg; } + case REG_LOWER_PAIR_SINGLE: + { + // We're asked for the lower single, so just return the register. + if (type == REG_IS_LOADED_SINGLE) + return host_reg; + + // Else convert this register back to a double. + m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + reg.LoadLowerReg(host_reg); + + // fall through + } case REG_LOWER_PAIR: { if (type == REG_REG) @@ -417,31 +429,40 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type) } // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. - if (type == REG_LOWER_PAIR && was_dirty) + if ((type == REG_LOWER_PAIR || type == REG_LOWER_PAIR_SINGLE) && was_dirty) { + // We must *not* change host_reg as this register might still be in use. So it's fine to + // store this register, but it's *not* fine to convert it to double. So for double convertion, + // a temporary register needs to be used. ARM64Reg host_reg = reg.GetReg(); + ARM64Reg flush_reg = host_reg; switch (reg.GetType()) { case REG_REG_SINGLE: - m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + flush_reg = GetReg(); + m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg)); // fall through case REG_REG: // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0])); break; case REG_DUP_SINGLE: - m_float_emit->FCVT(64, 32, EncodeRegToDouble(reg.GetReg()), EncodeRegToDouble(reg.GetReg())); + flush_reg = GetReg(); + m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg)); // fall through case REG_DUP: // Store PSR1 (which is equal to PSR0) in memory. - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1])); break; default: // All other types doesn't store anything in PSR1. break; } + + if (host_reg != flush_reg) + Unlock(flush_reg); } reg.Load(reg.GetReg(), type); @@ -502,11 +523,15 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); type = REG_REG; } - if (type == REG_DUP_SINGLE) + if (type == REG_DUP_SINGLE || type == REG_LOWER_PAIR_SINGLE) { if (dirty) m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - type = REG_DUP; + + if (type == REG_DUP_SINGLE) + type = REG_DUP; + else + type = REG_LOWER_PAIR; } if (type == REG_REG || type == REG_LOWER_PAIR) @@ -560,10 +585,10 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() return registers; } -bool Arm64FPRCache::IsSingle(u32 preg) +bool Arm64FPRCache::IsSingle(u32 preg, bool lower_only) { RegType type = m_guest_registers[preg].GetType(); - return type == REG_REG_SINGLE || type == REG_DUP_SINGLE; + return type == REG_REG_SINGLE || type == REG_DUP_SINGLE || (lower_only && type == REG_LOWER_PAIR_SINGLE); } void Arm64FPRCache::FixSinglePrecision(u32 preg) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index dcf33a0bb5..ae4e079f45 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -24,6 +24,7 @@ enum RegType REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded REG_REG_SINGLE, // Both registers are loaded as single + REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single REG_DUP_SINGLE, // The lower one contains both registers, as single REG_IS_LOADED_SINGLE, // We only want to access the lower one as single }; @@ -296,7 +297,7 @@ public: BitSet32 GetCallerSavedUsed() override; - bool IsSingle(u32 preg); + bool IsSingle(u32 preg, bool lower_only = false); void FixSinglePrecision(u32 preg); From 2112d8dfda4a03da715ef47f09a9b8d6d49d323b Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 21 Feb 2016 10:42:11 +0100 Subject: [PATCH 15/19] JitArm64: Replace REG_IS_LOADED with REG_LOWER_PAIR --- .../JitArm64/JitArm64_FloatingPoint.cpp | 21 +++++++++---------- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 2 +- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 6 +++--- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 2 -- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index d3cab02c0b..2be07aa2fc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -61,7 +61,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) } else { - RegType type = (inputs_are_singles && single) ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; + RegType type = (inputs_are_singles && single) ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; RegType type_out = single ? (inputs_are_singles ? REG_DUP_SINGLE : REG_DUP) : REG_LOWER_PAIR; ARM64Reg (*reg_encoder)(ARM64Reg) = (inputs_are_singles && single) ? EncodeRegToSingle : EncodeRegToDouble; @@ -128,12 +128,11 @@ void JitArm64::fp_logic(UGeckoInstruction inst) } else { - RegType type = single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; - RegType type2 = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; + RegType type = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble; ARM64Reg VB = fpr.R(b, type); - ARM64Reg VD = fpr.RW(d, type2); + ARM64Reg VD = fpr.RW(d, type); switch (op10) { @@ -155,9 +154,9 @@ void JitArm64::fselx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_IS_LOADED); - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VC = fpr.R(c, REG_IS_LOADED); + ARM64Reg VA = fpr.R(a, REG_LOWER_PAIR); + ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR); + ARM64Reg VC = fpr.R(c, REG_LOWER_PAIR); ARM64Reg VD = fpr.RW(d); m_float_emit.FCMPE(EncodeRegToDouble(VA)); @@ -176,7 +175,7 @@ void JitArm64::frspx(UGeckoInstruction inst) if (fpr.IsSingle(b, true)) { // Source is already in single precision, so no need to do anything but to copy to PSR1. - ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE); + ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR_SINGLE); ARM64Reg VD = fpr.RW(d, REG_DUP_SINGLE); if (b != d) @@ -184,7 +183,7 @@ void JitArm64::frspx(UGeckoInstruction inst) } else { - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR); ARM64Reg VD = fpr.RW(d, REG_DUP_SINGLE); m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); @@ -201,7 +200,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) int crf = inst.CRFD; bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true); - RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; + RegType type = singles ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble; ARM64Reg VA = reg_encoder(fpr.R(a, type)); @@ -266,7 +265,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); + ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR); ARM64Reg VD = fpr.RW(d); ARM64Reg V0 = fpr.GetReg(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 946a51a07d..e1e7865ff0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -272,7 +272,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true); - ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED); + ARM64Reg V0 = fpr.R(inst.FS, single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR); if (single) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 3a7c93f55a..0d7708f96f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -314,7 +314,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) case REG_REG_SINGLE: { // We're asked for singles, so just return the register. - if (type == REG_REG_SINGLE || type == REG_IS_LOADED_SINGLE) + if (type == REG_REG_SINGLE || type == REG_LOWER_PAIR_SINGLE) return host_reg; // Else convert this register back to doubles. @@ -330,7 +330,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) case REG_LOWER_PAIR_SINGLE: { // We're asked for the lower single, so just return the register. - if (type == REG_IS_LOADED_SINGLE) + if (type == REG_LOWER_PAIR_SINGLE) return host_reg; // Else convert this register back to a double. @@ -356,7 +356,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) } case REG_DUP_SINGLE: { - if (type == REG_IS_LOADED_SINGLE) + if (type == REG_LOWER_PAIR_SINGLE) return host_reg; if (type == REG_REG_SINGLE) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index ae4e079f45..accf081e2b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -22,11 +22,9 @@ enum RegType REG_IMM, // Reg is really a IMM REG_LOWER_PAIR, // Only the lower pair of a paired register REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) - REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded REG_REG_SINGLE, // Both registers are loaded as single REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single REG_DUP_SINGLE, // The lower one contains both registers, as single - REG_IS_LOADED_SINGLE, // We only want to access the lower one as single }; enum FlushMode From b4d0307b254f8be787e9245d59c83247d48bc847 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 25 Feb 2016 08:40:59 +0100 Subject: [PATCH 16/19] JitArm64: Remove LoadTo* helpers. --- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 26 +++++++++--------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 27 +------------------ 2 files changed, 14 insertions(+), 39 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 0d7708f96f..77eed0a70b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -198,7 +198,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg) { ARM64Reg host_reg = GetReg(); m_emit->MOVI2R(host_reg, reg.GetImm()); - reg.LoadToReg(host_reg); + reg.Load(host_reg); reg.SetDirty(true); return host_reg; } @@ -208,7 +208,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg) // This is a bit annoying. We try to keep these preloaded as much as possible // This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics ARM64Reg host_reg = GetReg(); - reg.LoadToReg(host_reg); + reg.Load(host_reg); reg.SetDirty(false); m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); return host_reg; @@ -240,7 +240,7 @@ void Arm64GPRCache::BindToRegister(u32 preg, bool do_load) if (reg.GetType() == REG_NOTLOADED) { ARM64Reg host_reg = GetReg(); - reg.LoadToReg(host_reg); + reg.Load(host_reg); if (do_load) m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); } @@ -319,7 +319,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) // Else convert this register back to doubles. m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - reg.LoadToReg(host_reg); + reg.Load(host_reg, REG_REG); // fall through } @@ -335,7 +335,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) // Else convert this register back to a double. m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - reg.LoadLowerReg(host_reg); + reg.Load(host_reg, REG_LOWER_PAIR); // fall through } @@ -350,7 +350,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) UnlockRegister(tmp_reg); // Change it over to a full 128bit register - reg.LoadToReg(host_reg); + reg.Load(host_reg, REG_REG); } return host_reg; } @@ -363,12 +363,12 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) { // Duplicate to the top and change over m_float_emit->INS(32, host_reg, 1, host_reg, 0); - reg.LoadToRegSingle(host_reg); + reg.Load(host_reg, REG_REG_SINGLE); return host_reg; } m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - reg.LoadDup(host_reg); + reg.Load(host_reg, REG_DUP); // fall through } @@ -380,7 +380,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) // but we are only available in the lower 64bits // Duplicate to the top and change over m_float_emit->INS(64, host_reg, 1, host_reg, 0); - reg.LoadToReg(host_reg); + reg.Load(host_reg, REG_REG); } return host_reg; } @@ -391,12 +391,12 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) if (type == REG_REG) { load_size = 128; - reg.LoadToReg(host_reg); + reg.Load(host_reg, REG_REG); } else { load_size = 64; - reg.LoadLowerReg(host_reg); + reg.Load(host_reg, REG_LOWER_PAIR); } reg.SetDirty(false); m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); @@ -599,11 +599,11 @@ void Arm64FPRCache::FixSinglePrecision(u32 preg) { case REG_DUP: // only PS0 needs to be converted m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - reg.LoadDupSingle(host_reg); + reg.Load(host_reg, REG_DUP_SINGLE); break; case REG_REG: // PS0 and PS1 needs to be converted m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - reg.LoadToRegSingle(host_reg); + reg.Load(host_reg, REG_REG_SINGLE); break; default: break; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index accf081e2b..6616e4b6fb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -58,36 +58,11 @@ public: { return m_value; } - void Load(ARM64Reg reg, RegType type) + void Load(ARM64Reg reg, RegType type = REG_REG) { m_type = type; m_reg = reg; } - void LoadToReg(ARM64Reg reg) - { - m_type = REG_REG; - m_reg = reg; - } - void LoadToRegSingle(ARM64Reg reg) - { - m_type = REG_REG_SINGLE; - m_reg = reg; - } - void LoadLowerReg(ARM64Reg reg) - { - m_type = REG_LOWER_PAIR; - m_reg = reg; - } - void LoadDup(ARM64Reg reg) - { - m_type = REG_DUP; - m_reg = reg; - } - void LoadDupSingle(ARM64Reg reg) - { - m_type = REG_DUP_SINGLE; - m_reg = reg; - } void LoadToImm(u32 imm) { m_type = REG_IMM; From e0793a274fce58bebab0804afba65f9b0a159a1a Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 25 Feb 2016 08:48:09 +0100 Subject: [PATCH 17/19] JitArm64: Track singles in fselx. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 2be07aa2fc..4e4dcf6729 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -154,13 +154,26 @@ void JitArm64::fselx(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_LOWER_PAIR); - ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR); - ARM64Reg VC = fpr.R(c, REG_LOWER_PAIR); - ARM64Reg VD = fpr.RW(d); + if (fpr.IsSingle(a, true)) + { + ARM64Reg VA = fpr.R(a, REG_LOWER_PAIR_SINGLE); + m_float_emit.FCMPE(EncodeRegToSingle(VA)); + } + else + { + ARM64Reg VA = fpr.R(a, REG_LOWER_PAIR); + m_float_emit.FCMPE(EncodeRegToDouble(VA)); + } - m_float_emit.FCMPE(EncodeRegToDouble(VA)); - m_float_emit.FCSEL(EncodeRegToDouble(VD), EncodeRegToDouble(VC), EncodeRegToDouble(VB), CC_GE); + bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true); + RegType type = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; + ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble; + + ARM64Reg VB = fpr.R(b, type); + ARM64Reg VC = fpr.R(c, type); + ARM64Reg VD = fpr.RW(d, type); + + m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE); } void JitArm64::frspx(UGeckoInstruction inst) From 141e2991619719b590cf0864964950dca4d3a51f Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 25 Feb 2016 08:52:26 +0100 Subject: [PATCH 18/19] JitArm64: Track single precision in fctiwzx. --- .../PowerPC/JitArm64/JitArm64_FloatingPoint.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 4e4dcf6729..d38a959a8f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -278,7 +278,9 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR); + bool single = fpr.IsSingle(b, true); + + ARM64Reg VB = fpr.R(b, single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR); ARM64Reg VD = fpr.RW(d); ARM64Reg V0 = fpr.GetReg(); @@ -287,8 +289,15 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL); m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); - m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); - m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z); + if (single) + { + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), ROUND_Z); + } + else + { + m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z); + } m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); fpr.Unlock(V0); } From 3286bbd9bdb522cb47c3113351886d17fa19ed89 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 25 Feb 2016 09:02:28 +0100 Subject: [PATCH 19/19] JitArm64: Track single precision in ps_sel. --- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index d6f4568100..afc4bc4a91 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -188,23 +188,29 @@ void JitArm64::ps_sel(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); + bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c); + RegType type = singles ? REG_REG_SINGLE : REG_REG; + u8 size = singles ? 32 : 64; + ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad; - if (d != a && d != b && d != c) + ARM64Reg VA = reg_encoder(fpr.R(a, type)); + ARM64Reg VB = reg_encoder(fpr.R(b, type)); + ARM64Reg VC = reg_encoder(fpr.R(c, type)); + ARM64Reg VD = reg_encoder(fpr.RW(d, type)); + + if (d != b && d != c) { - m_float_emit.FCMGE(64, VD, VA); + m_float_emit.FCMGE(size, VD, VA); m_float_emit.BSL(VD, VC, VB); } else { - ARM64Reg V0 = fpr.GetReg(); - m_float_emit.FCMGE(64, V0, VA); + ARM64Reg V0Q = fpr.GetReg(); + ARM64Reg V0 = reg_encoder(V0Q); + m_float_emit.FCMGE(size, V0, VA); m_float_emit.BSL(V0, VC, VB); m_float_emit.ORR(VD, V0, V0); - fpr.Unlock(V0); + fpr.Unlock(V0Q); } }