diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index e6b512cf2b..d3cab02c0b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -33,7 +33,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst) bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_b = op5 != 25; // fmul uses no B - bool inputs_are_singles = fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c)); + bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && (!use_c || fpr.IsSingle(c, !packed)); ARM64Reg VA, VB, VC, VD; @@ -105,13 +105,13 @@ void JitArm64::fp_logic(UGeckoInstruction inst) if (op10 == 72 && b == d) return; - bool is_single = fpr.IsSingle(b); + bool single = fpr.IsSingle(b, !packed); + u8 size = single ? 32 : 64; if (packed) { - RegType type = is_single ? REG_REG_SINGLE : REG_REG; - u8 size = is_single ? 32 : 64; - ARM64Reg (*reg_encoder)(ARM64Reg) = is_single ? EncodeRegToDouble : EncodeRegToQuad; + RegType type = single ? REG_REG_SINGLE : REG_REG; + ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToDouble : EncodeRegToQuad; ARM64Reg VB = reg_encoder(fpr.R(b, type)); ARM64Reg VD = reg_encoder(fpr.RW(d, type)); @@ -128,16 +128,20 @@ void JitArm64::fp_logic(UGeckoInstruction inst) } else { - ARM64Reg VB = fpr.R(b, REG_IS_LOADED); - ARM64Reg VD = fpr.RW(d); + RegType type = single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; + RegType type2 = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR; + ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble; + + ARM64Reg VB = fpr.R(b, type); + ARM64Reg VD = fpr.RW(d, type2); switch (op10) { - case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; - case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; - case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); - m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; - case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; + case 40: m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VB)); break; + case 72: m_float_emit.INS(size, VD, 0, VB, 0); break; + case 136: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); + m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VD)); break; + case 264: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); break; default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; } } @@ -169,7 +173,7 @@ void JitArm64::frspx(UGeckoInstruction inst) u32 b = inst.FB, d = inst.FD; - if (fpr.IsSingle(b)) + if (fpr.IsSingle(b, true)) { // Source is already in single precision, so no need to do anything but to copy to PSR1. ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE); @@ -196,7 +200,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) u32 a = inst.FA, b = inst.FB; int crf = inst.CRFD; - bool singles = fpr.IsSingle(a) && fpr.IsSingle(b); + bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true); RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 5fe4628a8d..946a51a07d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -270,7 +270,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) gpr.Lock(W0, W1, W30); fpr.Lock(Q0); - bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS); + bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true); ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 29df1314fe..3a7c93f55a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -327,6 +327,18 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) { return host_reg; } + case REG_LOWER_PAIR_SINGLE: + { + // We're asked for the lower single, so just return the register. + if (type == REG_IS_LOADED_SINGLE) + return host_reg; + + // Else convert this register back to a double. + m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + reg.LoadLowerReg(host_reg); + + // fall through + } case REG_LOWER_PAIR: { if (type == REG_REG) @@ -417,31 +429,40 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type) } // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. - if (type == REG_LOWER_PAIR && was_dirty) + if ((type == REG_LOWER_PAIR || type == REG_LOWER_PAIR_SINGLE) && was_dirty) { + // We must *not* change host_reg as this register might still be in use. So it's fine to + // store this register, but it's *not* fine to convert it to double. So for double convertion, + // a temporary register needs to be used. ARM64Reg host_reg = reg.GetReg(); + ARM64Reg flush_reg = host_reg; switch (reg.GetType()) { case REG_REG_SINGLE: - m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + flush_reg = GetReg(); + m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg)); // fall through case REG_REG: // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0])); break; case REG_DUP_SINGLE: - m_float_emit->FCVT(64, 32, EncodeRegToDouble(reg.GetReg()), EncodeRegToDouble(reg.GetReg())); + flush_reg = GetReg(); + m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg)); // fall through case REG_DUP: // Store PSR1 (which is equal to PSR0) in memory. - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1])); break; default: // All other types doesn't store anything in PSR1. break; } + + if (host_reg != flush_reg) + Unlock(flush_reg); } reg.Load(reg.GetReg(), type); @@ -502,11 +523,15 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); type = REG_REG; } - if (type == REG_DUP_SINGLE) + if (type == REG_DUP_SINGLE || type == REG_LOWER_PAIR_SINGLE) { if (dirty) m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); - type = REG_DUP; + + if (type == REG_DUP_SINGLE) + type = REG_DUP; + else + type = REG_LOWER_PAIR; } if (type == REG_REG || type == REG_LOWER_PAIR) @@ -560,10 +585,10 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() return registers; } -bool Arm64FPRCache::IsSingle(u32 preg) +bool Arm64FPRCache::IsSingle(u32 preg, bool lower_only) { RegType type = m_guest_registers[preg].GetType(); - return type == REG_REG_SINGLE || type == REG_DUP_SINGLE; + return type == REG_REG_SINGLE || type == REG_DUP_SINGLE || (lower_only && type == REG_LOWER_PAIR_SINGLE); } void Arm64FPRCache::FixSinglePrecision(u32 preg) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index dcf33a0bb5..ae4e079f45 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -24,6 +24,7 @@ enum RegType REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded REG_REG_SINGLE, // Both registers are loaded as single + REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single REG_DUP_SINGLE, // The lower one contains both registers, as single REG_IS_LOADED_SINGLE, // We only want to access the lower one as single }; @@ -296,7 +297,7 @@ public: BitSet32 GetCallerSavedUsed() override; - bool IsSingle(u32 preg); + bool IsSingle(u32 preg, bool lower_only = false); void FixSinglePrecision(u32 preg);