diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index 5570e7d860..bfd9e4c1e4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -128,7 +128,7 @@ X64Reg RegCache::GetFreeXReg() int preg = xregs[xr].ppcReg; if (!locks[preg]) { - StoreFromX64(preg); + StoreFromRegister(preg); return xr; } } @@ -159,7 +159,7 @@ void RegCache::FlushR(X64Reg reg) PanicAlert("Flushing non existent reg"); if (!xregs[reg].free) { - StoreFromX64(xregs[reg].ppcReg); + StoreFromRegister(xregs[reg].ppcReg); } } @@ -185,9 +185,12 @@ void RegCache::DiscardRegContentsIfCached(int preg) { if (regs[preg].away && regs[preg].location.IsSimpleReg()) { - xregs[regs[preg].location.GetSimpleReg()].free = true; - xregs[regs[preg].location.GetSimpleReg()].dirty = false; + X64Reg xr = regs[preg].location.GetSimpleReg(); + xregs[xr].free = true; + xregs[xr].dirty = false; + xregs[xr].ppcReg = -1; regs[preg].away = false; + regs[preg].location = GetDefaultLocation(preg); } } @@ -252,15 +255,18 @@ OpArg FPURegCache::GetDefaultLocation(int reg) const return M(&ppcState.ps[reg][0]); } -void RegCache::KillImmediate(int preg) +void RegCache::KillImmediate(int preg, bool doLoad, bool makeDirty) { - if (regs[preg].away && regs[preg].location.IsImm()) + if (regs[preg].away) { - LoadToX64(preg, true, true); + if (regs[preg].location.IsImm()) + BindToRegister(preg, doLoad, makeDirty); + else if (regs[preg].location.IsSimpleReg()) + xregs[RX(preg)].dirty |= makeDirty; } } -void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty) +void GPRRegCache::BindToRegister(int i, bool doLoad, bool makeDirty) { if (!regs[i].away && regs[i].location.IsImm()) PanicAlert("Bad immediate"); @@ -297,7 +303,7 @@ void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty) } } -void GPRRegCache::StoreFromX64(int i) +void GPRRegCache::StoreFromRegister(int i) { if (regs[i].away) { @@ -316,14 +322,14 @@ void GPRRegCache::StoreFromX64(int i) doStore = true; } OpArg newLoc = GetDefaultLocation(i); - // if (doStore) //<-- Breaks JIT compilation + if (doStore) emit->MOV(32, newLoc, regs[i].location); regs[i].location = newLoc; regs[i].away = false; } } -void FPURegCache::LoadToX64(int i, bool doLoad, bool makeDirty) +void FPURegCache::BindToRegister(int i, bool doLoad, bool makeDirty) { _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm"); if (!regs[i].away) @@ -351,7 +357,7 @@ void FPURegCache::LoadToX64(int i, bool doLoad, bool makeDirty) } } -void FPURegCache::StoreFromX64(int i) +void FPURegCache::StoreFromRegister(int i) { _assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - store - imm"); if (regs[i].away) @@ -389,12 +395,12 @@ void RegCache::Flush(FlushMode mode) if (regs[i].location.IsSimpleReg()) { X64Reg xr = RX(i); - StoreFromX64(i); + StoreFromRegister(i); xregs[xr].dirty = false; } else if (regs[i].location.IsImm()) { - StoreFromX64(i); + StoreFromRegister(i); } else { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h index e93db1fd6d..cefa0e13f7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h @@ -96,12 +96,12 @@ public: virtual void Flush(FlushMode mode); virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);} int SanityCheck() const; - void KillImmediate(int preg); + void KillImmediate(int preg, bool doLoad, bool makeDirty); //TODO - instead of doload, use "read", "write" //read only will not set dirty flag - virtual void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true) = 0; - virtual void StoreFromX64(int preg) = 0; + virtual void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true) = 0; + virtual void StoreFromRegister(int preg) = 0; const OpArg &R(int preg) const {return regs[preg].location;} X64Reg RX(int preg) const @@ -131,8 +131,8 @@ class GPRRegCache : public RegCache { public: void Start(PPCAnalyst::BlockRegStats &stats); - void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true); - void StoreFromX64(int preg); + void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true); + void StoreFromRegister(int preg); OpArg GetDefaultLocation(int reg) const; const int *GetAllocationOrder(int &count); void SetImmediate32(int preg, u32 immValue); @@ -143,8 +143,8 @@ class FPURegCache : public RegCache { public: void Start(PPCAnalyst::BlockRegStats &stats); - void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true); - void StoreFromX64(int preg); + void BindToRegister(int preg, bool doLoad = true, bool makeDirty = true); + void StoreFromRegister(int preg); const int *GetAllocationOrder(int &count); OpArg GetDefaultLocation(int reg) const; }; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 663fc67891..5c9013a78c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -34,24 +34,24 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm fpr.Lock(d, a, b); if (d == a) { - fpr.LoadToX64(d, true); + fpr.BindToRegister(d, true); (this->*op)(fpr.RX(d), fpr.R(b)); } else if (d == b && reversible) { - fpr.LoadToX64(d, true); + fpr.BindToRegister(d, true); (this->*op)(fpr.RX(d), fpr.R(a)); } else if (a != d && b != d) { // Sources different from d, can use rather quick solution - fpr.LoadToX64(d, !dupe); + fpr.BindToRegister(d, !dupe); MOVSD(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), fpr.R(b)); } else if (b != d) { - fpr.LoadToX64(d, !dupe); + fpr.BindToRegister(d, !dupe); MOVSD(XMM0, fpr.R(b)); MOVSD(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), Gen::R(XMM0)); @@ -60,7 +60,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm { MOVSD(XMM0, fpr.R(a)); MOVSD(XMM1, fpr.R(b)); - fpr.LoadToX64(d, !dupe); + fpr.BindToRegister(d, !dupe); (this->*op)(XMM0, Gen::R(XMM1)); MOVSD(fpr.RX(d), Gen::R(XMM0)); } @@ -87,7 +87,7 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) int d = inst.FD; int b = inst.FB; fpr.Lock(b, d); - fpr.LoadToX64(d, true, true); + fpr.BindToRegister(d, true, true); MOVSD(XMM0, M((void *)&one_const)); SQRTSD(XMM1, fpr.R(b)); DIVSD(XMM0, R(XMM1)); @@ -160,7 +160,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst) XORPD(XMM0, M((void*)&psSignBits2)); break; } - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); //YES it is necessary to dupe the result :( //TODO : analysis - does the top reg get used? If so, dupe, if not, don't. if (single_precision) { @@ -186,7 +186,7 @@ void Jit64::fsign(UGeckoInstruction inst) int d = inst.FD; int b = inst.FB; fpr.Lock(b, d); - fpr.LoadToX64(d, true, true); + fpr.BindToRegister(d, true, true); MOVSD(XMM0, fpr.R(b)); switch (inst.SUBOP10) { case 40: // fnegx @@ -216,7 +216,7 @@ void Jit64::fmrx(UGeckoInstruction inst) int d = inst.FD; int b = inst.FB; fpr.Lock(b, d); - fpr.LoadToX64(d, true, true); + fpr.BindToRegister(d, true, true); MOVSD(XMM0, fpr.R(b)); MOVSD(fpr.R(d), XMM0); fpr.UnlockAll(); @@ -238,7 +238,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) int crf = inst.CRFD; fpr.Lock(a,b); - if (a != b) fpr.LoadToX64(a, true); + if (a != b) fpr.BindToRegister(a, true); // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 6ba6b018d5..2ae6e77705 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -84,8 +84,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void } else { - if (gpr.R(d).IsImm()) - gpr.LoadToX64(d, false); + gpr.KillImmediate(d, true, true); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; if (carry) GenerateCarry(); @@ -93,7 +92,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void } else { - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; if (carry) @@ -107,7 +106,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void #ifdef __APPLE__ // XXX soren // FIXME: Seems to be required on OS X (see r5799) - gpr.StoreFromX64(d); + gpr.StoreFromRegister(d); #endif } else @@ -135,7 +134,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16); } else if (inst.SIMM_16 == 0 && d != a && a != 0) { gpr.Lock(a, d); - gpr.LoadToX64(d, false, true); + gpr.BindToRegister(d, false, true); MOV(32, gpr.R(d), gpr.R(a)); gpr.UnlockAll(); } else { @@ -150,7 +149,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) + (u32)(s32)js.next_inst.SIMM_16); #ifdef __APPLE__ // FIXME: Seems to be required on OS X (see r5799) - gpr.StoreFromX64(d); + gpr.StoreFromRegister(d); #endif js.downcountAmount++; js.skipnext = true; @@ -160,7 +159,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) gpr.SetImmediate32(d, ((u32)inst.SIMM_16 << 16) | (u32)js.next_inst.UIMM); #ifdef __APPLE__ // FIXME: Seems to be required on OS X (see r5799) - gpr.StoreFromX64(d); + gpr.StoreFromRegister(d); #endif js.downcountAmount++; js.skipnext = true; @@ -221,7 +220,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) OpArg comparand; if (inst.OPCD == 31) { gpr.Lock(a, b); - gpr.LoadToX64(a, true, false); + gpr.BindToRegister(a, true, false); comparand = gpr.R(b); if (inst.SUBOP10 == 32) { //cmpl @@ -234,7 +233,8 @@ void Jit64::cmpXX(UGeckoInstruction inst) } } else { - gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference + gpr.Lock(a); + gpr.KillImmediate(a, true, false); // todo, optimize instead, but unlikely to make a difference if (inst.OPCD == 10) { //cmpli less_than = CC_B; @@ -355,14 +355,14 @@ void Jit64::orx(UGeckoInstruction inst) if (s == b && s != a) { gpr.Lock(a,s); - gpr.LoadToX64(a, false); + gpr.BindToRegister(a, false); MOV(32, gpr.R(a), gpr.R(s)); gpr.UnlockAll(); } else { gpr.Lock(a, s, b); - gpr.LoadToX64(a, (a == s || a == b), true); + gpr.BindToRegister(a, (a == s || a == b), true); if (a == s) OR(32, gpr.R(a), gpr.R(b)); else if (a == b) @@ -385,12 +385,8 @@ void Jit64::orcx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS, b = inst.RB; - if (a != s && a != b) { - gpr.LoadToX64(a, false, true); - } else { - gpr.LoadToX64(a, true, true); - } gpr.Lock(a, s, b); + gpr.BindToRegister(a, (a == s || a == b), true); MOV(32, R(EAX), gpr.R(b)); NOT(32, R(EAX)); OR(32, R(EAX), gpr.R(s)); @@ -414,7 +410,7 @@ void Jit64::norx(UGeckoInstruction inst) if (s == b && s != a) { gpr.Lock(a,s); - gpr.LoadToX64(a, false); + gpr.BindToRegister(a, false); MOV(32, gpr.R(a), gpr.R(s)); NOT(32, gpr.R(a)); gpr.UnlockAll(); @@ -422,7 +418,7 @@ void Jit64::norx(UGeckoInstruction inst) else { gpr.Lock(a, s, b); - gpr.LoadToX64(a, (a == s || a == b), true); + gpr.BindToRegister(a, (a == s || a == b), true); if (a == s) OR(32, gpr.R(a), gpr.R(b)); else if (a == b) @@ -456,8 +452,8 @@ void Jit64::xorx(UGeckoInstruction inst) } else { - gpr.LoadToX64(a, a == s || a == b, true); gpr.Lock(a, s, b); + gpr.BindToRegister(a, a == s || a == b, true); MOV(32, R(EAX), gpr.R(s)); XOR(32, R(EAX), gpr.R(b)); MOV(32, gpr.R(a), R(EAX)); @@ -483,8 +479,8 @@ void Jit64::eqvx(UGeckoInstruction inst) } else { - gpr.LoadToX64(a, a == s || a == b, true); gpr.Lock(a, s, b); + gpr.BindToRegister(a, a == s || a == b, true); MOV(32, R(EAX), gpr.R(s)); XOR(32, R(EAX), gpr.R(b)); NOT(32, R(EAX)); @@ -503,12 +499,8 @@ void Jit64::andx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS, b = inst.RB; - if (a != s && a != b) { - gpr.LoadToX64(a, false, true); - } else { - gpr.LoadToX64(a, true, true); - } gpr.Lock(a, s, b); + gpr.BindToRegister(a, (a == s || a == b), true); MOV(32, R(EAX), gpr.R(s)); AND(32, R(EAX), gpr.R(b)); MOV(32, gpr.R(a), R(EAX)); @@ -525,12 +517,8 @@ void Jit64::nandx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS, b = inst.RB; - if (a != s && a != b) { - gpr.LoadToX64(a, false, true); - } else { - gpr.LoadToX64(a, true, true); - } gpr.Lock(a, s, b); + gpr.BindToRegister(a, (a == s || a == b), true); MOV(32, R(EAX), gpr.R(s)); AND(32, R(EAX), gpr.R(b)); NOT(32, R(EAX)); @@ -548,12 +536,8 @@ void Jit64::andcx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS, b = inst.RB; - if (a != s && a != b) { - gpr.LoadToX64(a, false, true); - } else { - gpr.LoadToX64(a, true, true); - } gpr.Lock(a, s, b); + gpr.BindToRegister(a, (a == s || a == b), true); MOV(32, R(EAX), gpr.R(b)); NOT(32, R(EAX)); AND(32, R(EAX), gpr.R(s)); @@ -570,9 +554,9 @@ void Jit64::extsbx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(Integer) - int a = inst.RA, - s = inst.RS; - gpr.LoadToX64(a, a == s, true); + int a = inst.RA, s = inst.RS; + gpr.Lock(a, s); + gpr.BindToRegister(a, a == s, true); // Always force moving to EAX because it isn't possible // to refer to the lowest byte of some registers, at least in // 32-bit mode. @@ -581,6 +565,7 @@ void Jit64::extsbx(UGeckoInstruction inst) if (inst.Rc) { ComputeRC(gpr.R(a)); } + gpr.UnlockAll(); } void Jit64::extshx(UGeckoInstruction inst) @@ -588,8 +573,9 @@ void Jit64::extshx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, s = inst.RS; - gpr.KillImmediate(s); - gpr.LoadToX64(a, a == s, true); + gpr.Lock(a, s); + gpr.KillImmediate(s, true, false); + gpr.BindToRegister(a, a == s, true); // This looks a little dangerous, but it's safe because // every 32-bit register has a 16-bit half at the same index // as the 32-bit register. @@ -597,6 +583,7 @@ void Jit64::extshx(UGeckoInstruction inst) if (inst.Rc) { ComputeRC(gpr.R(a)); } + gpr.UnlockAll(); } void Jit64::subfic(UGeckoInstruction inst) @@ -605,7 +592,7 @@ void Jit64::subfic(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA, d = inst.RD; gpr.Lock(a, d); - gpr.LoadToX64(d, a == d, true); + gpr.BindToRegister(d, a == d, true); int imm = inst.SIMM_16; MOV(32, R(EAX), gpr.R(a)); NOT(32, R(EAX)); @@ -622,10 +609,7 @@ void Jit64::subfcx(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.Lock(a, b, d); - if(d != a && d != b) - gpr.LoadToX64(d, false, true); - else - gpr.LoadToX64(d, true, true); + gpr.BindToRegister(d, (d == a || d == b), true); // For some reason, I could not get the jit versions of sub* // working with x86 sub...so we use the ~a + b + 1 method @@ -656,10 +640,7 @@ void Jit64::subfex(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.FlushLockX(ECX); gpr.Lock(a, b, d); - if(d != a && d != b) - gpr.LoadToX64(d, false, true); - else - gpr.LoadToX64(d, true, true); + gpr.BindToRegister(d, (d == a || d == b), true); // Get CA MOV(32, R(ECX), M(&PowerPC::ppcState.spr[SPR_XER])); @@ -701,8 +682,8 @@ void Jit64::subfmex(UGeckoInstruction inst) if (d == a) { - gpr.Lock(a, d); - gpr.LoadToX64(d, true); + gpr.Lock(d); + gpr.BindToRegister(d, true); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag NOT(32, gpr.R(d)); @@ -713,7 +694,7 @@ void Jit64::subfmex(UGeckoInstruction inst) else { gpr.Lock(a, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag MOV(32, gpr.R(d), gpr.R(a)); @@ -738,8 +719,8 @@ void Jit64::subfzex(UGeckoInstruction inst) if (d == a) { - gpr.Lock(a, d); - gpr.LoadToX64(d, true); + gpr.Lock(d); + gpr.BindToRegister(d, true); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag NOT(32, gpr.R(d)); @@ -750,7 +731,7 @@ void Jit64::subfzex(UGeckoInstruction inst) else { gpr.Lock(a, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag MOV(32, gpr.R(d), gpr.R(a)); @@ -772,11 +753,7 @@ void Jit64::subfx(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } + gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(b)); SUB(32, R(EAX), gpr.R(a)); MOV(32, gpr.R(d), R(EAX)); @@ -794,8 +771,8 @@ void Jit64::mulli(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA, d = inst.RD; gpr.Lock(a, d); - gpr.LoadToX64(d, (d == a), true); - gpr.KillImmediate(a); + gpr.BindToRegister(d, (d == a), true); + gpr.KillImmediate(a, true, false); IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); gpr.UnlockAll(); } @@ -806,7 +783,7 @@ void Jit64::mullwx(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.Lock(a, b, d); - gpr.LoadToX64(d, (d == a || d == b), true); + gpr.BindToRegister(d, (d == a || d == b), true); if (d == a) { IMUL(32, gpr.RX(d), gpr.R(b)); } else if (d == b) { @@ -828,15 +805,11 @@ void Jit64::mulhwux(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.FlushLockX(EDX); gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } + gpr.BindToRegister(d, (d == a || d == b), true); if (gpr.RX(d) == EDX) PanicAlert("mulhwux : WTF"); MOV(32, R(EAX), gpr.R(a)); - gpr.KillImmediate(b); + gpr.KillImmediate(b, true, false); MUL(32, gpr.R(b)); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -852,14 +825,10 @@ void Jit64::divwux(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.FlushLockX(EDX); gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } + gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); XOR(32, R(EDX), R(EDX)); - gpr.KillImmediate(b); + gpr.KillImmediate(b, true, false); CMP(32, gpr.R(b), Imm32(0)); // doesn't handle if OE is set, but int doesn't either... FixupBranch not_div_by_zero = J_CC(CC_NZ); @@ -887,7 +856,7 @@ void Jit64::addx(UGeckoInstruction inst) if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { gpr.Lock(a, b, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); gpr.UnlockAll(); } @@ -895,14 +864,14 @@ void Jit64::addx(UGeckoInstruction inst) { int operand = ((d == a) ? b : a); gpr.Lock(a, b, d); - gpr.LoadToX64(d, true); + gpr.BindToRegister(d, true); ADD(32, gpr.R(d), gpr.R(operand)); gpr.UnlockAll(); } else { gpr.Lock(a, b, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); gpr.UnlockAll(); @@ -924,7 +893,7 @@ void Jit64::addex(UGeckoInstruction inst) if ((d == a) || (d == b)) { gpr.Lock(a, b, d); - gpr.LoadToX64(d, true); + gpr.BindToRegister(d, true); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); @@ -934,7 +903,7 @@ void Jit64::addex(UGeckoInstruction inst) else { gpr.Lock(a, b, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag MOV(32, gpr.R(d), gpr.R(a)); @@ -960,7 +929,7 @@ void Jit64::addcx(UGeckoInstruction inst) { int operand = ((d == a) ? b : a); gpr.Lock(a, b, d); - gpr.LoadToX64(d, true); + gpr.BindToRegister(d, true); ADD(32, gpr.R(d), gpr.R(operand)); GenerateCarry(); gpr.UnlockAll(); @@ -968,7 +937,7 @@ void Jit64::addcx(UGeckoInstruction inst) else { gpr.Lock(a, b, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); GenerateCarry(); @@ -990,8 +959,8 @@ void Jit64::addmex(UGeckoInstruction inst) if (d == a) { - gpr.Lock(a, d); - gpr.LoadToX64(d, true); + gpr.Lock(d); + gpr.BindToRegister(d, true); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); @@ -1001,7 +970,7 @@ void Jit64::addmex(UGeckoInstruction inst) else { gpr.Lock(a, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag MOV(32, gpr.R(d), gpr.R(a)); @@ -1025,8 +994,8 @@ void Jit64::addzex(UGeckoInstruction inst) if (d == a) { - gpr.Lock(a, d); - gpr.LoadToX64(d, true); + gpr.Lock(d); + gpr.BindToRegister(d, true); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag ADC(32, gpr.R(d), Imm8(0)); @@ -1036,7 +1005,7 @@ void Jit64::addzex(UGeckoInstruction inst) else { gpr.Lock(a, d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag MOV(32, gpr.R(d), gpr.R(a)); @@ -1068,7 +1037,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) } gpr.Lock(a, s); - gpr.LoadToX64(a, a == s); + gpr.BindToRegister(a, a == s); if (a != s) { MOV(32, gpr.R(a), gpr.R(s)); @@ -1112,18 +1081,9 @@ void Jit64::rlwimix(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA; int s = inst.RS; - if (gpr.R(a).IsImm() || gpr.R(s).IsImm()) - { - Default(inst); - return; - } - - if (a != s) - { - gpr.Lock(a, s); - gpr.LoadToX64(a, true); - } + gpr.Lock(a, s); + gpr.KillImmediate(a, true, true); u32 mask = Helper_Mask(inst.MB, inst.ME); MOV(32, R(EAX), gpr.R(s)); AND(32, gpr.R(a), Imm32(~mask)); @@ -1143,15 +1103,11 @@ void Jit64::rlwnmx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Integer) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.R(a).IsImm()) - { - Default(inst); - return; - } - + u32 mask = Helper_Mask(inst.MB, inst.ME); gpr.FlushLockX(ECX); gpr.Lock(a, b, s); + gpr.KillImmediate(a, (a == s || a == b), true); MOV(32, R(EAX), gpr.R(s)); MOV(32, R(ECX), gpr.R(b)); AND(32, R(ECX), Imm32(0x1f)); @@ -1173,7 +1129,7 @@ void Jit64::negx(UGeckoInstruction inst) int a = inst.RA; int d = inst.RD; gpr.Lock(a, d); - gpr.LoadToX64(d, a == d, true); + gpr.BindToRegister(d, a == d, true); if (a != d) MOV(32, gpr.R(d), gpr.R(a)); NEG(32, gpr.R(d)); @@ -1193,7 +1149,7 @@ void Jit64::srwx(UGeckoInstruction inst) int s = inst.RS; gpr.FlushLockX(ECX); gpr.Lock(a, b, s); - gpr.LoadToX64(a, a == s || a == b || s == b, true); + gpr.BindToRegister(a, a == s || a == b || s == b, true); MOV(32, R(ECX), gpr.R(b)); XOR(32, R(EAX), R(EAX)); TEST(32, R(ECX), Imm32(32)); @@ -1219,7 +1175,7 @@ void Jit64::slwx(UGeckoInstruction inst) int s = inst.RS; gpr.FlushLockX(ECX); gpr.Lock(a, b, s); - gpr.LoadToX64(a, a == s || a == b || s == b, true); + gpr.BindToRegister(a, a == s || a == b || s == b, true); MOV(32, R(ECX), gpr.R(b)); XOR(32, R(EAX), R(EAX)); TEST(32, R(ECX), Imm32(32)); @@ -1244,9 +1200,9 @@ void Jit64::srawx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.Lock(a, s); + gpr.Lock(a, s, b); gpr.FlushLockX(ECX); - gpr.LoadToX64(a, a == s || a == b, true); + gpr.BindToRegister(a, a == s || a == b, true); MOV(32, R(ECX), gpr.R(b)); TEST(32, R(ECX), Imm32(32)); FixupBranch topBitSet = J_CC(CC_NZ); @@ -1290,7 +1246,7 @@ void Jit64::srawix(UGeckoInstruction inst) if (amount != 0) { gpr.Lock(a, s); - gpr.LoadToX64(a, a == s, true); + gpr.BindToRegister(a, a == s, true); MOV(32, R(EAX), gpr.R(s)); MOV(32, gpr.R(a), R(EAX)); SAR(32, gpr.R(a), Imm8(amount)); @@ -1311,7 +1267,7 @@ void Jit64::srawix(UGeckoInstruction inst) Default(inst); return; gpr.Lock(a, s); JitClearCA(); - gpr.LoadToX64(a, a == s, true); + gpr.BindToRegister(a, a == s, true); if (a != s) MOV(32, gpr.R(a), gpr.R(s)); gpr.UnlockAll(); @@ -1329,13 +1285,10 @@ void Jit64::cntlzwx(UGeckoInstruction inst) JITDISABLE(Integer) int a = inst.RA; int s = inst.RS; - if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a) - { - Default(inst); - return; - } + gpr.Lock(a, s); - gpr.LoadToX64(a, false); + gpr.KillImmediate(s, true, false); + gpr.BindToRegister(a, (a == s), true); BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); FixupBranch gotone = J_CC(CC_NZ); MOV(32, gpr.R(a), Imm32(63)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index e553bf5609..60a33911dd 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -44,11 +44,9 @@ void Jit64::lbzx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(b); MOV(32, R(ABI_PARAM1), gpr.R(b)); if (a) { - gpr.Lock(a); ADD(32, R(ABI_PARAM1), gpr.R(a)); } @@ -57,7 +55,7 @@ void Jit64::lbzx(UGeckoInstruction inst) MEMCHECK_START gpr.Lock(d); - gpr.LoadToX64(d, (b == d || a == d), true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END @@ -73,11 +71,9 @@ void Jit64::lhax(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(b); MOV(32, R(ABI_PARAM1), gpr.R(b)); if (a) { - gpr.Lock(a); ADD(32, R(ABI_PARAM1), gpr.R(a)); } @@ -87,7 +83,7 @@ void Jit64::lhax(UGeckoInstruction inst) MEMCHECK_START gpr.Lock(d); - gpr.LoadToX64(d, (b == d || a == d), true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END @@ -103,11 +99,9 @@ void Jit64::lwzx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(b); MOV(32, R(ABI_PARAM1), gpr.R(b)); if (a) { - gpr.Lock(a); ADD(32, R(ABI_PARAM1), gpr.R(a)); } @@ -116,7 +110,7 @@ void Jit64::lwzx(UGeckoInstruction inst) MEMCHECK_START gpr.Lock(d); - gpr.LoadToX64(d, (b == d || a == d), true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END @@ -157,10 +151,10 @@ void Jit64::lXz(UGeckoInstruction inst) // do our job at first s32 offset = (s32)(s16)inst.SIMM_16; gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(d, a); + gpr.Lock(d); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, 32, offset); - gpr.LoadToX64(d, false, true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -214,8 +208,8 @@ void Jit64::lXz(UGeckoInstruction inst) { // Fast and daring gpr.Lock(a, d); - gpr.LoadToX64(a, true, false); - gpr.LoadToX64(d, a == d, true); + gpr.BindToRegister(a, true, false); + gpr.BindToRegister(d, a == d, true); MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); BSWAP(32, gpr.R(d).GetSimpleReg()); gpr.UnlockAll(); @@ -223,16 +217,13 @@ void Jit64::lXz(UGeckoInstruction inst) else { gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(a); - gpr.LoadToX64(a, true, false); - MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); MEMCHECK_START gpr.Lock(d); - gpr.LoadToX64(d, a == d, true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END @@ -252,14 +243,13 @@ void Jit64::lha(UGeckoInstruction inst) s32 offset = (s32)(s16)inst.SIMM_16; // Safe and boring gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); MEMCHECK_START gpr.Lock(d); - gpr.LoadToX64(d, d == a, true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END @@ -280,7 +270,7 @@ void Jit64::lwzux(UGeckoInstruction inst) return; } gpr.Lock(a); - gpr.LoadToX64(a, true, true); + gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); MOV(32, R(EAX), gpr.R(a)); SafeLoadRegToEAX(EAX, 32, 0, false); @@ -288,7 +278,7 @@ void Jit64::lwzux(UGeckoInstruction inst) MEMCHECK_START gpr.Lock(d); - gpr.LoadToX64(d, b == d, true); + gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END @@ -392,7 +382,11 @@ void Jit64::stX(UGeckoInstruction inst) MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); #endif if (update) + { + gpr.Lock(a); + gpr.KillImmediate(a, true, true); ADD(32, gpr.R(a), Imm32(offset)); + } gpr.UnlockAllX(); return; } @@ -403,7 +397,7 @@ void Jit64::stX(UGeckoInstruction inst) { // Fast and daring - requires 64-bit MOV(32, R(EAX), gpr.R(s)); - gpr.LoadToX64(a, true, false); + gpr.BindToRegister(a, true, false); BSWAP(32, EAX); MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX)); return; @@ -415,7 +409,7 @@ void Jit64::stX(UGeckoInstruction inst) gpr.FlushLockX(ECX, EDX); gpr.Lock(s, a); if (update && offset) - gpr.LoadToX64(a, true, true); + gpr.BindToRegister(a, true, true); MOV(32, R(EDX), gpr.R(a)); MOV(32, R(ECX), gpr.R(s)); SafeWriteRegToReg(ECX, EDX, accessSize, offset); @@ -453,7 +447,7 @@ void Jit64::stXx(UGeckoInstruction inst) gpr.FlushLockX(ECX, EDX); if (inst.SUBOP10 & 32) { - gpr.LoadToX64(a, true, true); + gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); MOV(32, R(EDX), gpr.R(a)); } else { @@ -496,7 +490,7 @@ void Jit64::lmw(UGeckoInstruction inst) { MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4)); BSWAP(32, ECX); - gpr.LoadToX64(i, false, true); + gpr.BindToRegister(i, false, true); MOV(32, gpr.R(i), R(ECX)); } gpr.UnlockAllX(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index ecd583d154..44160dc6b1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -77,7 +77,7 @@ void Jit64::lfs(UGeckoInstruction inst) MOV(32, M(&temp32), R(EAX)); fpr.Lock(d); - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); CVTSS2SD(fpr.RX(d), M(&temp32)); MOVDDUP(fpr.RX(d), fpr.R(d)); @@ -107,8 +107,8 @@ void Jit64::lfd(UGeckoInstruction inst) gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); // TODO - optimize. This has to load the previous value - upper double should stay unmodified. - fpr.LoadToX64(d, true); fpr.Lock(d); + fpr.BindToRegister(d, true); X64Reg xd = fpr.RX(d); if (cpu_info.bSSSE3) { #ifdef _M_X64 @@ -184,7 +184,7 @@ void Jit64::stfd(UGeckoInstruction inst) gpr.FlushLockX(ABI_PARAM1); gpr.Lock(a); fpr.Lock(s); - gpr.LoadToX64(a, true, false); + gpr.BindToRegister(a, true, false); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); TEST(32, R(ABI_PARAM1), Imm32(0x0c000000)); FixupBranch not_ram = J_CC(CC_Z); @@ -222,7 +222,7 @@ void Jit64::stfd(UGeckoInstruction inst) #endif } else { #ifdef _M_X64 - fpr.LoadToX64(s, true, false); + fpr.BindToRegister(s, true, false); MOVSD(M(&temp64), fpr.RX(s)); MEMCHECK_START @@ -233,7 +233,7 @@ void Jit64::stfd(UGeckoInstruction inst) MEMCHECK_END #else - fpr.LoadToX64(s, true, false); + fpr.BindToRegister(s, true, false); MOVSD(M(&temp64), fpr.RX(s)); MEMCHECK_START @@ -301,6 +301,7 @@ void Jit64::stfs(UGeckoInstruction inst) { MEMCHECK_START + gpr.KillImmediate(a, false, true); MOV(32, gpr.R(a), R(ABI_PARAM2)); MEMCHECK_END @@ -345,7 +346,7 @@ void Jit64::lfsx(UGeckoInstruction inst) } if (cpu_info.bSSSE3 && !js.memcheck) { fpr.Lock(inst.RS); - fpr.LoadToX64(inst.RS, false, true); + fpr.BindToRegister(inst.RS, false, true); X64Reg r = fpr.R(inst.RS).GetSimpleReg(); #ifdef _M_IX86 AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); @@ -368,7 +369,7 @@ void Jit64::lfsx(UGeckoInstruction inst) MOV(32, M(&temp32), R(EAX)); CVTSS2SD(XMM0, M(&temp32)); fpr.Lock(inst.RS); - fpr.LoadToX64(inst.RS, false, true); + fpr.BindToRegister(inst.RS, false, true); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); MEMCHECK_END diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index edef0a4bf6..cfd9a6970b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -99,8 +99,8 @@ void Jit64::psq_st(UGeckoInstruction inst) gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(ECX); if (update) - gpr.LoadToX64(inst.RA, true, true); - fpr.LoadToX64(inst.RS, true); + gpr.BindToRegister(inst.RA, true, true); + fpr.BindToRegister(inst.RS, true, false); MOV(32, R(ECX), gpr.R(inst.RA)); if (offset) ADD(32, R(ECX), Imm32((u32)offset)); @@ -159,8 +159,8 @@ void Jit64::psq_l(UGeckoInstruction inst) gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(ECX); - gpr.LoadToX64(inst.RA, true, true); - fpr.LoadToX64(inst.RS, false, true); + gpr.BindToRegister(inst.RA, true, update && offset); + fpr.BindToRegister(inst.RS, false, true); if (offset) LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset)); else diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp index b350010c13..de3057f8b7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp @@ -50,7 +50,7 @@ void Jit64::ps_mr(UGeckoInstruction inst) int b = inst.FB; if (d == b) return; - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); MOVAPD(fpr.RX(d), fpr.R(b)); } @@ -72,8 +72,8 @@ void Jit64::ps_sel(UGeckoInstruction inst) fpr.FlushLockX(XMM7); fpr.FlushLockX(XMM6); fpr.Lock(a, b, c, d); - fpr.LoadToX64(a, true, false); - fpr.LoadToX64(d, false, true); + fpr.BindToRegister(a, true, false); + fpr.BindToRegister(d, false, true); // BLENDPD would have been nice... MOVAPD(XMM7, fpr.R(a)); CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111 @@ -99,12 +99,12 @@ void Jit64::ps_sign(UGeckoInstruction inst) fpr.Lock(d, b); if (d != b) { - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); MOVAPD(fpr.RX(d), fpr.R(b)); } else { - fpr.LoadToX64(d, true); + fpr.BindToRegister(d, true); } switch (inst.SUBOP10) @@ -133,6 +133,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst) int d = inst.FD; int b = inst.FB; fpr.Lock(d, b); + fpr.BindToRegister(d, (d == b), true); SQRTPD(XMM0, fpr.R(b)); MOVAPD(XMM1, M((void*)&psOneOne)); DIVPD(XMM1, R(XMM0)); @@ -161,24 +162,24 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6 if (d == a) { - fpr.LoadToX64(d, true); + fpr.BindToRegister(d, true); (this->*op)(fpr.RX(d), fpr.R(b)); } else if (d == b && reversible) { - fpr.LoadToX64(d, true); + fpr.BindToRegister(d, true); (this->*op)(fpr.RX(d), fpr.R(a)); } else if (a != d && b != d) { //sources different from d, can use rather quick solution - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); MOVAPD(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), fpr.R(b)); } else if (b != d) { - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); MOVAPD(XMM0, fpr.R(b)); MOVAPD(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), Gen::R(XMM0)); @@ -187,7 +188,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6 { MOVAPD(XMM0, fpr.R(a)); MOVAPD(XMM1, fpr.R(b)); - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); (this->*op)(XMM0, Gen::R(XMM1)); MOVAPD(fpr.RX(d), Gen::R(XMM0)); } @@ -231,7 +232,7 @@ void Jit64::ps_sum(UGeckoInstruction inst) int b = inst.FB; int c = inst.FC; fpr.Lock(a,b,c,d); - fpr.LoadToX64(d, d == a || d == b || d == c, true); + fpr.BindToRegister(d, d == a || d == b || d == c, true); switch (inst.SUBOP5) { case 10: @@ -271,7 +272,7 @@ void Jit64::ps_muls(UGeckoInstruction inst) int a = inst.FA; int c = inst.FC; fpr.Lock(a, c, d); - fpr.LoadToX64(d, d == a || d == c, true); + fpr.BindToRegister(d, d == a || d == c, true); switch (inst.SUBOP5) { case 12: @@ -329,7 +330,7 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst) default: _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); } - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); MOVAPD(fpr.RX(d), Gen::R(XMM0)); fpr.UnlockAll(); } @@ -387,7 +388,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst) //fpr.UnlockAll(); return; } - fpr.LoadToX64(d, false); + fpr.BindToRegister(d, false); MOVAPD(fpr.RX(d), Gen::R(XMM0)); ForceSinglePrecisionP(fpr.RX(d)); fpr.UnlockAll(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index bbfddd4f0b..61b9eb4474 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -72,8 +72,11 @@ void Jit64::mtspr(UGeckoInstruction inst) } // OK, this is easy. - gpr.Lock(d); - gpr.LoadToX64(d, true); + if (!gpr.R(d).IsImm()) + { + gpr.Lock(d); + gpr.BindToRegister(d, true, false); + } MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); gpr.UnlockAll(); } @@ -98,7 +101,7 @@ void Jit64::mfspr(UGeckoInstruction inst) // fall through default: gpr.Lock(d); - gpr.LoadToX64(d, false); + gpr.BindToRegister(d, false); MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); gpr.UnlockAll(); break; @@ -113,8 +116,13 @@ void Jit64::mtmsr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(SystemRegisters) - gpr.LoadToX64(inst.RS, true, false); + if (!gpr.R(inst.RS).IsImm()) + { + gpr.Lock(inst.RS); + gpr.BindToRegister(inst.RS, true, false); + } MOV(32, M(&MSR), gpr.R(inst.RS)); + gpr.UnlockAll(); gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); WriteExit(js.compilerPC + 4, 0); @@ -127,8 +135,10 @@ void Jit64::mfmsr(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(SystemRegisters) //Privileged? - gpr.LoadToX64(inst.RD, false); + gpr.Lock(inst.RD); + gpr.BindToRegister(inst.RD, false, true); MOV(32, gpr.R(inst.RD), M(&MSR)); + gpr.UnlockAll(); } void Jit64::mftb(UGeckoInstruction inst) @@ -144,7 +154,8 @@ void Jit64::mfcr(UGeckoInstruction inst) JITDISABLE(SystemRegisters) // USES_CR int d = inst.RD; - gpr.LoadToX64(d, false, true); + gpr.Lock(d); + gpr.KillImmediate(d, false, true); MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); for (int i = 1; i < 8; i++) { SHL(32, R(EAX), Imm8(4)); @@ -175,7 +186,8 @@ void Jit64::mtcrf(UGeckoInstruction inst) } else { - gpr.LoadToX64(inst.RS, true); + gpr.Lock(inst.RS); + gpr.BindToRegister(inst.RS, true, false); for (int i = 0; i < 8; i++) { if ((crm & (0x80 >> i)) != 0) @@ -186,6 +198,7 @@ void Jit64::mtcrf(UGeckoInstruction inst) MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX)); } } + gpr.UnlockAll(); } } }