diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index a1ef3c409c..ba5975ab24 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -598,6 +598,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; + js.mustCheckFifo = false; js.curBlock = b; js.numLoadStoreInst = 0; js.numFloatingPointInst = 0; @@ -723,9 +724,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end(); // Gather pipe writes using an immediate address are explicitly tracked. - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo)) { - js.fifoBytesThisBlock -= 32; + if (js.fifoBytesThisBlock >= 32) + js.fifoBytesThisBlock -= 32; + js.mustCheckFifo = false; BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunction((void*)&GPFifo::FastCheckGatherPipe); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index b00b63845b..4fce2e91bf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -241,4 +241,6 @@ public: void stmw(UGeckoInstruction inst); void dcbx(UGeckoInstruction inst); + + void eieio(UGeckoInstruction inst); }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 75df5bf07e..700e87c3ae 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -309,7 +309,7 @@ static GekkoOPTemplate table31[] = { // Unused instructions on GC {310, &Jit64::FallBackToInterpreter}, // eciwx {438, &Jit64::FallBackToInterpreter}, // ecowx - {854, &Jit64::DoNothing}, // eieio + {854, &Jit64::eieio}, // eieio {306, &Jit64::FallBackToInterpreter}, // tlbie {566, &Jit64::DoNothing}, // tlbsync }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index a67dad0274..2059587dd6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -598,3 +598,15 @@ void Jit64::stmw(UGeckoInstruction inst) } gpr.UnlockAllX(); } + +void Jit64::eieio(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + // optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block, + // which is generally safe. However postponing FIFO writes across eieio instructions + // is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection). + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + js.mustCheckFifo = true; +} diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index d615248083..e19343393a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -405,6 +405,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB js.assumeNoPairedQuantize = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; + js.mustCheckFifo = false; js.downcountAmount = 0; js.skipInstructions = 0; js.curBlock = b; @@ -491,9 +492,11 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB bool gatherPipeIntCheck = jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end(); - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo)) { - js.fifoBytesThisBlock -= 32; + if (js.fifoBytesThisBlock >= 32) + js.fifoBytesThisBlock -= 32; + js.mustCheckFifo = false; gpr.Lock(W30); BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 00181d916d..c2832ff460 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -116,6 +116,7 @@ public: void dcbx(UGeckoInstruction inst); void dcbt(UGeckoInstruction inst); void dcbz(UGeckoInstruction inst); + void eieio(UGeckoInstruction inst); // LoadStore floating point void lfXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 1876ff454e..34a8638024 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -853,3 +853,15 @@ void JitArm64::dcbz(UGeckoInstruction inst) gpr.Unlock(W0); } + +void JitArm64::eieio(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + // optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block, + // which is generally safe. However postponing FIFO writes across eieio instructions + // is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection). + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + js.mustCheckFifo = true; +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 18d05b4abf..723cf91aa6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -314,7 +314,7 @@ static GekkoOPTemplate table31[] = { // Unused instructions on GC {310, &JitArm64::FallBackToInterpreter}, // eciwx {438, &JitArm64::FallBackToInterpreter}, // ecowx - {854, &JitArm64::DoNothing}, // eieio + {854, &JitArm64::eieio}, // eieio {306, &JitArm64::FallBackToInterpreter}, // tlbie {566, &JitArm64::DoNothing}, // tlbsync }; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 7b96a6b739..489bb5ba38 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -103,6 +103,7 @@ protected: bool generatingTrampoline = false; u8* trampolineExceptionHandler; + bool mustCheckFifo; int fifoBytesThisBlock; PPCAnalyst::BlockStats st;