From cf5088c37e5cd486b031faa6fdfc4f4ff7b28082 Mon Sep 17 00:00:00 2001 From: "dok.slade" Date: Mon, 23 Aug 2010 22:26:00 +0000 Subject: [PATCH] JIT compiler: * Improved constants folding in load/store instructions * Merged load instructions This is almost the same commit as r6076/r6077 but x64 build has been fixed. Thanks a lot to skidau and BHaaL!! git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6120 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/ABI.cpp | 17 +- Source/Core/Common/Src/x64Emitter.h | 1 + Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 8 +- .../Core/Src/PowerPC/Jit64/Jit64_Tables.cpp | 32 +- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 358 ++++++++---------- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 14 +- .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 167 ++++++-- .../Core/Src/PowerPC/JitCommon/Jit_Util.h | 3 +- 8 files changed, 338 insertions(+), 262 deletions(-) diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp index f565a77633..d18e5b1e41 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/ABI.cpp @@ -127,12 +127,20 @@ void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2 void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { ABI_AlignStack(2 * 4); - PUSH(32, arg1); PUSH(32, Imm32(param2)); + PUSH(32, arg1); CALL(func); ABI_RestoreStack(2 * 4); } +void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) +{ + ABI_AlignStack(1 * 4); + PUSH(32, arg1); + CALL(func); + ABI_RestoreStack(1 * 4); +} + void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { // Note: 4 * 4 = 16 bytes, so alignment is preserved. PUSH(EBP); @@ -259,6 +267,13 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2 CALL(func); } +void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) +{ + if (!arg1.IsSimpleReg(ABI_PARAM1)) + MOV(32, R(ABI_PARAM1), arg1); + CALL(func); +} + unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { return frameSize; } diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 36f5ea0b61..308c3021eb 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -600,6 +600,7 @@ public: void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3); void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3); void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2); + void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1); // Pass a register as a paremeter. void ABI_CallFunctionR(void *func, Gen::X64Reg reg1); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index daaf15243b..198ee0233b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -236,8 +236,6 @@ public: void fmaddXX(UGeckoInstruction inst); void fsign(UGeckoInstruction inst); void stX(UGeckoInstruction inst); //stw sth stb - void lXz(UGeckoInstruction inst); - void lha(UGeckoInstruction inst); void rlwinmx(UGeckoInstruction inst); void rlwimix(UGeckoInstruction inst); void rlwnmx(UGeckoInstruction inst); @@ -254,12 +252,8 @@ public: void subfmex(UGeckoInstruction inst); void subfzex(UGeckoInstruction inst); - void lbzx(UGeckoInstruction inst); - void lwzx(UGeckoInstruction inst); - void lhax(UGeckoInstruction inst); + void lXXx(UGeckoInstruction inst); - void lwzux(UGeckoInstruction inst); - void stXx(UGeckoInstruction inst); void lmw(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp index ef50a62057..d55702767c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit64_Tables.cpp @@ -77,14 +77,14 @@ static GekkoOPTemplate primarytable[] = {28, &Jit64::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &Jit64::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {32, &Jit64::lXz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &Jit64::Default}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &Jit64::lXz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &Jit64::Default}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &Jit64::lXz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &Jit64::Default}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &Jit64::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &Jit64::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {32, &Jit64::lXXx}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &Jit64::lXXx}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &Jit64::lXXx}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &Jit64::lXXx}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &Jit64::lXXx}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &Jit64::lXXx}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &Jit64::lXXx}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &Jit64::lXXx}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {44, &Jit64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {45, &Jit64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, @@ -220,20 +220,20 @@ static GekkoOPTemplate table31[] = {1014, &Jit64::dcbz}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, //load word - {23, &Jit64::lwzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {55, &Jit64::lwzux}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {23, &Jit64::lXXx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {55, &Jit64::lXXx}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load halfword - {279, &Jit64::Default}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {311, &Jit64::Default}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {279, &Jit64::lXXx}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {311, &Jit64::lXXx}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load halfword signextend - {343, &Jit64::lhax}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {375, &Jit64::Default}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {343, &Jit64::lXXx}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {375, &Jit64::lXXx}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte - {87, &Jit64::lbzx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {119, &Jit64::Default}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + {87, &Jit64::lXXx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte reverse {534, &Jit64::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index c007343583..4dd676e57a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -34,96 +34,85 @@ #include "JitAsm.h" #include "JitRegCache.h" -void Jit64::lbzx(UGeckoInstruction inst) +void Jit64::lXXx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) - if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) + int a = inst.RA, b = inst.RB, d = inst.RD; + + // Skip disabled JIT instructions + if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff && (inst.OPCD == 31) && (inst.SUBOP10 == 87)) + { Default(inst); return; } + if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff && ((inst.OPCD == 34) || (inst.OPCD == 40) || (inst.OPCD == 32))) + { Default(inst); return; } + if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff && (inst.OPCD == 32)) { Default(inst); return; } - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) + // Determine memory access size and sign extend + int accessSize; + bool signExtend; + switch (inst.OPCD) { - ADD(32, R(ABI_PARAM1), gpr.R(a)); + case 32: /* lwz */ + case 33: /* lwzu */ + accessSize = 32; + signExtend = false; + break; + + case 34: /* lbz */ + case 35: /* lbzu */ + accessSize = 8; + signExtend = false; + break; + + case 40: /* lhz */ + case 41: /* lhzu */ + accessSize = 16; + signExtend = false; + break; + + case 42: /* lha */ + case 43: /* lhau */ + accessSize = 16; + signExtend = true; + break; + + case 31: + switch (inst.SUBOP10) + { + case 23: /* lwzx */ + case 55: /* lwzux */ + accessSize = 32; + signExtend = false; + break; + + case 87: /* lbzx */ + case 119: /* lbzux */ + accessSize = 8; + signExtend = false; + break; + case 279: /* lhzx */ + case 311: /* lhzux */ + accessSize = 16; + signExtend = false; + break; + + case 343: /* lhax */ + case 375: /* lhaux */ + accessSize = 16; + signExtend = true; + break; + + default: + PanicAlert("Invalid instruction"); + } + break; + + default: + PanicAlert("Invalid instruction"); } - SafeLoadRegToEAX(ABI_PARAM1, 8, 0); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lhax(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - { - ADD(32, R(ABI_PARAM1), gpr.R(a)); - } - - // Some homebrew actually loads from a hw reg with this instruction - SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lwzx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - { - ADD(32, R(ABI_PARAM1), gpr.R(a)); - } - - SafeLoadRegToEAX(ABI_PARAM1, 32, 0); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lXz(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff) - { Default(inst); return; } - - int d = inst.RD; - int a = inst.RA; - // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate // Will give nice boost to dual core mode // (mb2): I agree, @@ -144,20 +133,17 @@ void Jit64::lXz(UGeckoInstruction inst) // do our job at first s32 offset = (s32)(s16)inst.SIMM_16; - gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, 32, offset); + SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend); gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); - gpr.UnlockAllX(); - + gpr.Flush(FLUSH_ALL); // if it's still 0, we can wait until the next event - CMP(32, R(RAX), Imm32(0)); - FixupBranch noIdle = J_CC(CC_NE); + TEST(32, R(EAX), R(EAX)); + FixupBranch noIdle = J_CC(CC_NZ); gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); @@ -172,110 +158,81 @@ void Jit64::lXz(UGeckoInstruction inst) //js.compilerPC += 8; return; } - - // R2 always points to the small read-only data area. We could bake R2-relative loads into immediates. - // R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode. - - s32 offset = (s32)(s16)inst.SIMM_16; - if (!a) + + // Determine whether this instruction updates inst.RA + bool update; + if (inst.OPCD == 31) + update = ((inst.SUBOP10 & 0x20) != 0); + else + update = ((inst.OPCD & 1) != 0); + + // Prepare address operand + Gen::OpArg opAddress; + if (!update && !a) { - Default(inst); - return; + if (inst.OPCD == 31) + { + gpr.Lock(b); + opAddress = gpr.R(b); + } + else + { + opAddress = Imm32((u32)(s32)inst.SIMM_16); + } } - - int accessSize; - switch (inst.OPCD) + else if (update && ((a == 0) || (d == a))) { - case 32: - accessSize = 32; - if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;} - break; //lwz - case 40: accessSize = 16; break; //lhz - case 34: accessSize = 8; break; //lbz - default: - //_assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); - PanicAlert("lXz: invalid access size"); - return; - } - - if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU) - { - // Fast and daring - gpr.Lock(a, d); - gpr.BindToRegister(a, true, false); - gpr.BindToRegister(d, a == d, true); - MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); - BSWAP(32, gpr.R(d).GetSimpleReg()); - gpr.UnlockAll(); + PanicAlert("Invalid instruction"); } else { - gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(a); - gpr.BindToRegister(a, true, false); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAll(); - gpr.UnlockAllX(); + if ((inst.OPCD != 31) && gpr.R(a).IsImm()) + { + opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16); + } + else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm()) + { + opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset); + } + else + { + gpr.FlushLockX(ABI_PARAM1); + opAddress = R(ABI_PARAM1); + MOV(32, opAddress, gpr.R(a)); + + if (inst.OPCD == 31) + ADD(32, opAddress, gpr.R(b)); + else + ADD(32, opAddress, Imm32((u32)(s32)inst.SIMM_16)); + } } -} -void Jit64::lha(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) + SafeLoadToEAX(opAddress, accessSize, 0, signExtend); - int d = inst.RD; - int a = inst.RA; - s32 offset = (s32)(s16)inst.SIMM_16; - // Safe and boring - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); - - MEMCHECK_START - - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END - - gpr.UnlockAllX(); -} - -void Jit64::lwzux(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) - - int a = inst.RA, b = inst.RB, d = inst.RD; - if (!a || a == d || a == b) + // We must flush immediate values from the following registers because + // they may change at runtime if no MMU exception has been raised + gpr.KillImmediate(d, true, true); + if (update) { - Default(inst); - return; + gpr.Lock(a); + gpr.BindToRegister(a, true, true); } - gpr.Lock(a); - gpr.BindToRegister(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(EAX), gpr.R(a)); - SafeLoadRegToEAX(EAX, 32, 0, false); - + MEMCHECK_START - gpr.KillImmediate(d, false, true); + if (update) + { + if (inst.OPCD == 31) + ADD(32, gpr.R(a), gpr.R(b)); + else + ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); + } MOV(32, gpr.R(d), R(EAX)); MEMCHECK_END - + gpr.UnlockAll(); + gpr.UnlockAllX(); } // Zero cache line. @@ -312,7 +269,7 @@ void Jit64::stX(UGeckoInstruction inst) bool update = inst.OPCD & 1; s32 offset = (s32)(s16)inst.SIMM_16; - if (a || update) + if (a || !update) { int accessSize; switch (inst.OPCD & ~1) @@ -323,18 +280,18 @@ void Jit64::stX(UGeckoInstruction inst) default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; } - if (gpr.R(a).IsImm()) + if ((a == 0) || gpr.R(a).IsImm()) { // If we already know the address through constant folding, we can do some // fun tricks... - u32 addr = (u32)gpr.R(a).offset; + u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset); addr += offset; if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { - if (offset && update) - gpr.SetImmediate32(a, addr); gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(s)); + if (update) + gpr.SetImmediate32(a, addr); switch (accessSize) { // No need to protect these, they don't touch any state @@ -347,16 +304,27 @@ void Jit64::stX(UGeckoInstruction inst) gpr.UnlockAllX(); return; } - else if (Memory::IsRAMAddress(addr) && accessSize == 32) + else if (Memory::IsRAMAddress(addr)) { - if (offset && update) - gpr.SetImmediate32(a, addr); - MOV(accessSize, R(EAX), gpr.R(s)); + MOV(32, R(EAX), gpr.R(s)); BSWAP(accessSize, EAX); WriteToConstRamAddress(accessSize, R(EAX), addr); + if (update) + gpr.SetImmediate32(a, addr); + return; + } + else + { + switch (accessSize) + { + case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break; + case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break; + case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break; + } + if (update) + gpr.SetImmediate32(a, addr); return; } - // Other IO not worth the trouble. } // Optimized stack access? @@ -368,11 +336,11 @@ void Jit64::stX(UGeckoInstruction inst) BSWAP(32, EAX); #ifdef _M_X64 MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX)); -#elif _M_IX86 +#else AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); #endif - if (update) + if (update && offset) { gpr.Lock(a); gpr.KillImmediate(a, true, true); @@ -406,9 +374,9 @@ void Jit64::stX(UGeckoInstruction inst) if (update && offset) { + gpr.KillImmediate(a, true, true); MEMCHECK_START - gpr.KillImmediate(a, true, true); ADD(32, gpr.R(a), Imm32((u32)offset)); MEMCHECK_END @@ -419,7 +387,7 @@ void Jit64::stX(UGeckoInstruction inst) } else { - Default(inst); + PanicAlert("Invalid stX"); } } @@ -470,9 +438,7 @@ void Jit64::stXx(UGeckoInstruction inst) // A few games use these heavily in video codecs. void Jit64::lmw(UGeckoInstruction inst) { -#ifdef _M_IX86 - Default(inst); return; -#else +#ifdef _M_X64 gpr.FlushLockX(ECX); MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16)); if (inst.RA) @@ -485,14 +451,14 @@ void Jit64::lmw(UGeckoInstruction inst) MOV(32, gpr.R(i), R(ECX)); } gpr.UnlockAllX(); +#else + Default(inst); return; #endif } void Jit64::stmw(UGeckoInstruction inst) { -#ifdef _M_IX86 - Default(inst); return; -#else +#ifdef _M_X64 gpr.FlushLockX(ECX); MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16)); if (inst.RA) @@ -504,6 +470,8 @@ void Jit64::stmw(UGeckoInstruction inst) MOV(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), R(ECX)); } gpr.UnlockAllX(); +#else + Default(inst); return; #endif } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 44160dc6b1..55990d6abf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -62,15 +62,13 @@ void Jit64::lfs(UGeckoInstruction inst) return; } s32 offset = (s32)(s16)inst.SIMM_16; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(a)); if (jo.assumeFPLoadFromMem) { - UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false); + UnsafeLoadToEAX(gpr.R(a), 32, offset, false); } else { - SafeLoadRegToEAX(ABI_PARAM1, 32, offset); + SafeLoadToEAX(gpr.R(a), 32, offset, false); } MEMCHECK_START @@ -83,7 +81,6 @@ void Jit64::lfs(UGeckoInstruction inst) MEMCHECK_END - gpr.UnlockAllX(); fpr.UnlockAll(); } @@ -299,9 +296,12 @@ void Jit64::stfs(UGeckoInstruction inst) ADD(32, R(ABI_PARAM2), Imm32(offset)); if (update && offset) { + // We must flush immediate values from the following register because + // it may take another value at runtime if no MMU exception has been raised + gpr.KillImmediate(a, true, true); + MEMCHECK_START - gpr.KillImmediate(a, false, true); MOV(32, gpr.R(a), R(ABI_PARAM2)); MEMCHECK_END @@ -362,7 +362,7 @@ void Jit64::lfsx(UGeckoInstruction inst) MEMCHECK_END } else { - SafeLoadRegToEAX(EAX, 32, false); + SafeLoadToEAX(R(EAX), 32, 0, false); MEMCHECK_START diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index b01cc4c517..42641c1e3d 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -36,11 +36,11 @@ static u32 GC_ALIGNED16(float_buffer); void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { -#ifdef _M_IX86 +#ifdef _M_X64 + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); +#else AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset)); -#else - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); #endif if (accessSize == 32) { @@ -63,52 +63,149 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset) { -#ifdef _M_IX86 +#ifdef _M_X64 + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); +#else AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset)); -#else - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); #endif } -void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend) +void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) { - if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU) +#ifdef _M_X64 + if (opAddress.IsSimpleReg()) { - // FIXME: accessSize == 16 does not work. Breaks mkdd - UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend); + MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); + } + else if (opAddress.IsImm() && (((u32)opAddress.offset + offset) < 0x80000000)) // MDisp can only be used with s32 offsets + { + MOVZX(32, accessSize, EAX, MDisp(RBX, (u32)opAddress.offset + offset)); } else { - if (offset) - ADD(32, R(reg_addr), Imm32((u32)offset)); + MOV(32, R(EAX), opAddress); + MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset)); + } +#else + if (opAddress.IsImm()) + { + MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); + } + else + { + if (!opAddress.IsSimpleReg(EAX)) + MOV(32, R(EAX), opAddress); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset)); + } +#endif + + if (accessSize == 32) + { + BSWAP(32, EAX); + } + else if (accessSize == 16) + { + BSWAP(32, EAX); + if (signExtend) + SAR(32, R(EAX), Imm8(16)); + else + SHR(32, R(EAX), Imm8(16)); + } + else if (signExtend) + { + // TODO: bake 8-bit into the original load. + MOVSX(32, accessSize, EAX, R(EAX)); + } +} +void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +{ + if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32) && !Core::g_CoreStartupParameter.bMMU) + { + // BackPatch only supports 32-bits accesses + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + } + else + { u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; - if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) { mem_mask |= Memory::ADDR_MASK_MEM1; } - - TEST(32, R(reg_addr), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); - - switch (accessSize) + + if (opAddress.IsImm()) { - case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg_addr); break; - case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg_addr); break; - case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg_addr); break; + u32 address = (u32)opAddress.offset + offset; + if ((address & mem_mask) == 0) + { + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + } + else + { + switch (accessSize) + { + case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break; + case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break; + case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + } } - if (signExtend && accessSize < 32) + else { - // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); - } + if (offset) + { + MOV(32, R(EAX), opAddress); + ADD(32, R(EAX), Imm32(offset)); + TEST(32, R(EAX), Imm32(mem_mask)); + FixupBranch fast = J_CC(CC_Z); - FixupBranch exit = J(); - SetJumpTarget(fast); - UnsafeLoadRegToReg(reg_addr, EAX, accessSize, 0, signExtend); - SetJumpTarget(exit); + switch (accessSize) + { + case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break; + case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break; + case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + + FixupBranch exit = J(); + SetJumpTarget(fast); + UnsafeLoadToEAX(R(EAX), accessSize, 0, signExtend); + SetJumpTarget(exit); + } + else + { + TEST(32, opAddress, Imm32(mem_mask)); + FixupBranch fast = J_CC(CC_Z); + + switch (accessSize) + { + case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break; + case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break; + case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + + FixupBranch exit = J(); + SetJumpTarget(fast); + UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + SetJumpTarget(exit); + } + } } } @@ -118,11 +215,11 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); } if (swap) BSWAP(accessSize, reg_value); -#ifdef _M_IX86 +#ifdef _M_X64 + MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); +#else AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); -#else - MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); #endif } @@ -174,11 +271,11 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr) FixupBranch arg2 = J(); SetJumpTarget(argh); PSHUFB(xmm_value, M((void *)pbswapShuffle1x4)); -#ifdef _M_IX86 +#ifdef _M_X64 + MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value); +#else AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value); -#else - MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value); #endif SetJumpTarget(arg2); } else { diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index 99ee795822..70d4e294e4 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -27,7 +27,8 @@ public: void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); - void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); + void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); + void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true); // Trashes both inputs and EAX.