Added an external exception check when the CPU writes to the FIFO. This allows the CPU time to service FIFO overflows. Fixes random hangs caused by FIFO overflows and desyncs like in "The Last Story" and "Battalion Wars 2". Thanks to marcosvitali for the research.

Fixes issue 5209.
Fixes issue 5150.
Fixes issue 5055.
Fixes issue 4889.
Fixes issue 4061.
Fixes issue 4010.
Fixes issue 3902.
This commit is contained in:
skidau
2012-03-02 18:53:41 +11:00
parent ee09def802
commit 9e398fd418
14 changed files with 75 additions and 42 deletions

View File

@ -19,9 +19,9 @@
#include "ChunkFile.h" #include "ChunkFile.h"
#include "ProcessorInterface.h" #include "ProcessorInterface.h"
#include "Memmap.h" #include "Memmap.h"
#include "../PowerPC/PowerPC.h"
#include "VideoBackendBase.h" #include "VideoBackendBase.h"
#include "../PowerPC/JitCommon/JitBase.h"
#include "../PowerPC/PowerPC.h"
#include "GPFifo.h" #include "GPFifo.h"
@ -96,6 +96,16 @@ void STACKALIGN CheckGatherPipe()
// move back the spill bytes // move back the spill bytes
memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount); memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount);
// Profile where the FIFO writes are occurring.
const u32 addr = PC - 4;
if (jit && (jit->js.fifoWriteAddresses.find(addr)) == (jit->js.fifoWriteAddresses.end()))
{
jit->js.fifoWriteAddresses.insert(addr);
// Invalidate the JIT block so that it gets recompiled with the external exception check included.
jit->GetBlockCache()->InvalidateICache(addr, 8);
}
} }
} }

View File

@ -367,7 +367,7 @@ void Interpreter::dcbf(UGeckoInstruction _inst)
if (jit) if (jit)
{ {
u32 address = Helper_Get_EA_X(_inst); u32 address = Helper_Get_EA_X(_inst);
jit->GetBlockCache()->InvalidateICache(address & ~0x1f); jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32);
} }
} }
@ -378,7 +378,7 @@ void Interpreter::dcbi(UGeckoInstruction _inst)
if (jit) if (jit)
{ {
u32 address = Helper_Get_EA_X(_inst); u32 address = Helper_Get_EA_X(_inst);
jit->GetBlockCache()->InvalidateICache(address & ~0x1f); jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32);
} }
} }

View File

@ -41,6 +41,7 @@
#include "JitAsm.h" #include "JitAsm.h"
#include "JitRegCache.h" #include "JitRegCache.h"
#include "Jit64_Tables.h" #include "Jit64_Tables.h"
#include "HW/ProcessorInterface.h"
using namespace Gen; using namespace Gen;
using namespace PowerPC; using namespace PowerPC;
@ -569,6 +570,24 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SetJumpTarget(b1); SetJumpTarget(b1);
} }
// Add an external exception check if the instruction writes to the FIFO.
if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end())
{
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = J_CC(CC_Z);
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
FixupBranch noCPInt = J_CC(CC_Z);
MOV(32, M(&PC), Imm32(js.compilerPC));
WriteExceptionExit();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtException);
}
Jit64Tables::CompileInstruction(ops[i]); Jit64Tables::CompileInstruction(ops[i]);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) if (js.memcheck && (opinfo->flags & FL_LOADSTORE))

View File

@ -1260,8 +1260,8 @@ static const std::string opcodeNames[] = {
"FResult_End", "StorePaired", "StoreSingle", "StoreDouble", "StoreFReg", "FResult_End", "StorePaired", "StoreSingle", "StoreDouble", "StoreFReg",
"FDCmpCR", "CInt16", "CInt32", "SystemCall", "RFIExit", "FDCmpCR", "CInt16", "CInt32", "SystemCall", "RFIExit",
"InterpreterBranch", "IdleBranch", "ShortIdleLoop", "InterpreterBranch", "IdleBranch", "ShortIdleLoop",
"FPExceptionCheckStart", "FPExceptionCheckEnd", "ISIException", "Tramp", "FPExceptionCheckStart", "FPExceptionCheckEnd", "ISIException", "ExtExceptionCheck",
"BlockStart", "BlockEnd", "Int3", "Tramp", "BlockStart", "BlockEnd", "Int3",
}; };
static const unsigned alwaysUsedList[] = { static const unsigned alwaysUsedList[] = {
InterpreterFallback, StoreGReg, StoreCR, StoreLink, StoreCTR, StoreMSR, InterpreterFallback, StoreGReg, StoreCR, StoreLink, StoreCTR, StoreMSR,
@ -1269,7 +1269,7 @@ static const unsigned alwaysUsedList[] = {
Store16, Store32, StoreSingle, StoreDouble, StorePaired, StoreFReg, FDCmpCR, Store16, Store32, StoreSingle, StoreDouble, StorePaired, StoreFReg, FDCmpCR,
BlockStart, BlockEnd, IdleBranch, BranchCond, BranchUncond, ShortIdleLoop, BlockStart, BlockEnd, IdleBranch, BranchCond, BranchUncond, ShortIdleLoop,
SystemCall, InterpreterBranch, RFIExit, FPExceptionCheckStart, SystemCall, InterpreterBranch, RFIExit, FPExceptionCheckStart,
FPExceptionCheckEnd, ISIException, Int3, Tramp, Nop FPExceptionCheckEnd, ISIException, ExtExceptionCheck, Int3, Tramp, Nop
}; };
static const unsigned extra8RegList[] = { static const unsigned extra8RegList[] = {
LoadGReg, LoadCR, LoadGQR, LoadFReg, LoadFRegDENToZero, LoadGReg, LoadCR, LoadGQR, LoadFReg, LoadFRegDENToZero,

View File

@ -165,10 +165,10 @@ enum Opcode {
ShortIdleLoop, // Idle loop seen in homebrew like wii mahjong, ShortIdleLoop, // Idle loop seen in homebrew like wii mahjong,
// just a branch // just a branch
// used for MMU, at least until someone // used for exception checking, at least until someone
// has a better idea of integrating it // has a better idea of integrating it
FPExceptionCheckStart, FPExceptionCheckEnd, FPExceptionCheckStart, FPExceptionCheckEnd,
ISIException, ISIException,ExtExceptionCheck,
// "Opcode" representing a register too far away to // "Opcode" representing a register too far away to
// reference directly; this is a size optimization // reference directly; this is a size optimization
Tramp, Tramp,
@ -411,6 +411,9 @@ public:
InstLoc EmitISIException(InstLoc dest) { InstLoc EmitISIException(InstLoc dest) {
return EmitUOp(ISIException, dest); return EmitUOp(ISIException, dest);
} }
InstLoc EmitExtExceptionCheck(InstLoc pc) {
return EmitUOp(ExtExceptionCheck, pc);
}
InstLoc EmitRFIExit() { InstLoc EmitRFIExit() {
return FoldZeroOp(RFIExit, 0); return FoldZeroOp(RFIExit, 0);
} }

View File

@ -50,6 +50,7 @@ The register allocation is linear scan allocation.
#include "../../../../Common/Src/CPUDetect.h" #include "../../../../Common/Src/CPUDetect.h"
#include "MathUtil.h" #include "MathUtil.h"
#include "../../Core.h" #include "../../Core.h"
#include "HW/ProcessorInterface.h"
static ThunkManager thunks; static ThunkManager thunks;
@ -761,6 +762,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
case FPExceptionCheckStart: case FPExceptionCheckStart:
case FPExceptionCheckEnd: case FPExceptionCheckEnd:
case ISIException: case ISIException:
case ExtExceptionCheck:
case Int3: case Int3:
case Tramp: case Tramp:
// No liveness effects // No liveness effects
@ -1920,6 +1922,21 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
break; break;
} }
case ExtExceptionCheck: {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = Jit->J_CC(CC_Z);
Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
FixupBranch noCPInt = Jit->J_CC(CC_Z);
Jit->MOV(32, M(&PC), Imm32(InstLoc));
Jit->WriteExceptionExit();
Jit->SetJumpTarget(noCPInt);
Jit->SetJumpTarget(noExtException);
break;
}
case Int3: { case Int3: {
Jit->INT3(); Jit->INT3();
break; break;

View File

@ -649,6 +649,11 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
ibuild.EmitFPExceptionCheckStart(ibuild.EmitIntConst(ops[i].address)); ibuild.EmitFPExceptionCheckStart(ibuild.EmitIntConst(ops[i].address));
} }
if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end())
{
ibuild.EmitExtExceptionCheck(ibuild.EmitIntConst(ops[i].address));
}
JitILTables::CompileInstruction(ops[i]); JitILTables::CompileInstruction(ops[i]);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) if (js.memcheck && (opinfo->flags & FL_LOADSTORE))

View File

@ -31,6 +31,8 @@
#include "PowerPCDisasm.h" #include "PowerPCDisasm.h"
#include "disasm.h" #include "disasm.h"
#include <set>
#define JIT_OPCODE 0 #define JIT_OPCODE 0
class JitBase : public CPUCoreBase, public EmuCodeBlock class JitBase : public CPUCoreBase, public EmuCodeBlock
@ -75,6 +77,8 @@ protected:
u8* rewriteStart; u8* rewriteStart;
JitBlock *curBlock; JitBlock *curBlock;
std::set<u32> fifoWriteAddresses;
}; };
public: public:

View File

@ -390,13 +390,12 @@ bool JitBlock::ContainsAddress(u32 em_address)
} }
void JitBlockCache::InvalidateICache(u32 address) void JitBlockCache::InvalidateICache(u32 address, const u32 length)
{ {
address &= ~0x1f;
// destroy JIT blocks // destroy JIT blocks
// !! this works correctly under assumption that any two overlapping blocks end at the same address // !! this works correctly under assumption that any two overlapping blocks end at the same address
std::map<pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(address, 0)), it2 = it1, it; std::map<pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(address, 0)), it2 = it1, it;
while (it2 != block_map.end() && it2->first.second < address + 0x20) while (it2 != block_map.end() && it2->first.second < address + length)
{ {
DestroyBlock(it2->second, true); DestroyBlock(it2->second, true);
it2++; it2++;
@ -418,17 +417,17 @@ bool JitBlock::ContainsAddress(u32 em_address)
if (address & JIT_ICACHE_VMEM_BIT) if (address & JIT_ICACHE_VMEM_BIT)
{ {
u32 cacheaddr = address & JIT_ICACHE_MASK; u32 cacheaddr = address & JIT_ICACHE_MASK;
memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, length);
} }
else if (address & JIT_ICACHE_EXRAM_BIT) else if (address & JIT_ICACHE_EXRAM_BIT)
{ {
u32 cacheaddr = address & JIT_ICACHEEX_MASK; u32 cacheaddr = address & JIT_ICACHEEX_MASK;
memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, length);
} }
else else
{ {
u32 cacheaddr = address & JIT_ICACHE_MASK; u32 cacheaddr = address & JIT_ICACHE_MASK;
memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, length);
} }
#endif #endif
} }

View File

@ -129,7 +129,7 @@ public:
CompiledCode GetCompiledCodeFromBlock(int block_num); CompiledCode GetCompiledCodeFromBlock(int block_num);
// DOES NOT WORK CORRECTLY WITH INLINING // DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 em_address); void InvalidateICache(u32 address, const u32 length);
void DestroyBlock(int block_num, bool invalidate); void DestroyBlock(int block_num, bool invalidate);
// Not currently used // Not currently used

View File

@ -110,7 +110,7 @@ namespace PowerPC
#endif #endif
valid[set] = 0; valid[set] = 0;
if (jit) if (jit)
jit->GetBlockCache()->InvalidateICache(addr); jit->GetBlockCache()->InvalidateICache(addr & ~0x1f, 32);
} }
u32 InstructionCache::ReadInstruction(u32 addr) u32 InstructionCache::ReadInstruction(u32 addr)

View File

@ -60,7 +60,6 @@ volatile bool interruptSet= false;
volatile bool interruptWaiting= false; volatile bool interruptWaiting= false;
volatile bool interruptTokenWaiting = false; volatile bool interruptTokenWaiting = false;
volatile bool interruptFinishWaiting = false; volatile bool interruptFinishWaiting = false;
volatile bool OnOverflow = false;
bool IsOnThread() bool IsOnThread()
{ {
@ -92,7 +91,6 @@ void DoState(PointerWrap &p)
p.Do(interruptWaiting); p.Do(interruptWaiting);
p.Do(interruptTokenWaiting); p.Do(interruptTokenWaiting);
p.Do(interruptFinishWaiting); p.Do(interruptFinishWaiting);
p.Do(OnOverflow);
} }
inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);} inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
@ -135,7 +133,6 @@ void Init()
bProcessFifoToLoWatermark = false; bProcessFifoToLoWatermark = false;
bProcessFifoAllDistance = false; bProcessFifoAllDistance = false;
isPossibleWaitingSetDrawDone = false; isPossibleWaitingSetDrawDone = false;
OnOverflow = false;
et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper); et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
} }
@ -449,26 +446,7 @@ void STACKALIGN GatherPipeBursted()
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE); Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
if (!IsOnThread()) if (!IsOnThread())
{
RunGpu(); RunGpu();
}
else
{
if(fifo.CPReadWriteDistance == fifo.CPEnd - fifo.CPBase - 32)
{
if(!OnOverflow)
NOTICE_LOG(COMMANDPROCESSOR,"FIFO is almost in overflown, BreakPoint: %i", fifo.bFF_Breakpoint);
OnOverflow = true;
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable &&
fifo.CPReadWriteDistance > fifo.CPEnd - fifo.CPBase - 64)
Common::YieldCPU();
}
else
{
OnOverflow = false;
}
}
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase, _assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflown by GatherPipe !\nCPU thread is too fast!"); "FIFO is overflown by GatherPipe !\nCPU thread is too fast!");

View File

@ -35,7 +35,6 @@ extern volatile bool interruptSet;
extern volatile bool interruptWaiting; extern volatile bool interruptWaiting;
extern volatile bool interruptTokenWaiting; extern volatile bool interruptTokenWaiting;
extern volatile bool interruptFinishWaiting; extern volatile bool interruptFinishWaiting;
extern volatile bool OnOverflow;
// internal hardware addresses // internal hardware addresses
enum enum

View File

@ -137,8 +137,7 @@ void RunGpuLoop()
CommandProcessor::SetCpStatus(); CommandProcessor::SetCpStatus();
// check if we are able to run this buffer // check if we are able to run this buffer
while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
fifo.CPReadWriteDistance && (!AtBreakpoint() || CommandProcessor::OnOverflow))
{ {
if (!GpuRunningState) break; if (!GpuRunningState) break;