add proper support for GXFIFO stalls.

bad games that blast the GXFIFO and overflow it:
* Super Mario 64 DS
* Rayman RR2

latter seems to get its music streaming crapoed.
This commit is contained in:
StapleButter
2018-11-23 22:21:41 +01:00
parent 27e1ca4103
commit a9e7f8bc5b
8 changed files with 140 additions and 34 deletions

View File

@ -217,6 +217,7 @@ void ARM::JumpTo(u32 addr, bool restorecpsr)
// aging cart debug crap // aging cart debug crap
//if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]); //if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]);
//if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]); //if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]);
// R0=DMA# R1=src R2=size
u32 oldregion = R[15] >> 23; u32 oldregion = R[15] >> 23;
u32 newregion = addr >> 23; u32 newregion = addr >> 23;

View File

@ -242,12 +242,14 @@ s32 DMA::Run(s32 cycles)
if (!Running) if (!Running)
return cycles; return cycles;
Executing = true;
if (!(Cnt & 0x04000000)) if (!(Cnt & 0x04000000))
{ {
u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;
void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16;
while (IterCount > 0 && cycles > 0) while (IterCount > 0 && cycles > 0 && !Stall)
{ {
writefn(CurDstAddr, readfn(CurSrcAddr)); writefn(CurDstAddr, readfn(CurSrcAddr));
@ -264,7 +266,8 @@ s32 DMA::Run(s32 cycles)
else else
{ {
// optimized path for typical GXFIFO DMA // optimized path for typical GXFIFO DMA
if (IsGXFIFODMA) // likely not worth it tbh
/*if (IsGXFIFODMA)
{ {
while (IterCount > 0 && cycles > 0) while (IterCount > 0 && cycles > 0)
{ {
@ -278,12 +281,12 @@ s32 DMA::Run(s32 cycles)
IterCount--; IterCount--;
RemCount--; RemCount--;
} }
} }*/
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
while (IterCount > 0 && cycles > 0) while (IterCount > 0 && cycles > 0 && !Stall)
{ {
writefn(CurDstAddr, readfn(CurSrcAddr)); writefn(CurDstAddr, readfn(CurSrcAddr));
@ -298,6 +301,9 @@ s32 DMA::Run(s32 cycles)
} }
} }
Executing = false;
Stall = false;
if (RemCount) if (RemCount)
{ {
if (IterCount == 0) if (IterCount == 0)

View File

@ -53,6 +53,11 @@ public:
Cnt &= ~0x80000000; Cnt &= ~0x80000000;
} }
void StallIfRunning()
{
if (Executing) Stall = true;
}
u32 SrcAddr; u32 SrcAddr;
u32 DstAddr; u32 DstAddr;
u32 Cnt; u32 Cnt;
@ -74,6 +79,9 @@ private:
bool Running; bool Running;
bool InProgress; bool InProgress;
bool Executing;
bool Stall;
bool IsGXFIFODMA; bool IsGXFIFODMA;
}; };

View File

@ -73,6 +73,13 @@
// TODO: check how DISP_1DOT_DEPTH works and whether it's latched // TODO: check how DISP_1DOT_DEPTH works and whether it's latched
// command execution notes
//
// timings given by GBAtek are for individual commands
// real-life timings are different depending on how commands are combined
// the engine is able to do parallel execution to some extent
namespace GPU3D namespace GPU3D
{ {
@ -116,38 +123,38 @@ const u32 CmdNumParams[256] =
const s32 CmdNumCycles[256] = const s32 CmdNumCycles[256] =
{ {
// 0x00 // 0x00
0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x10 // 0x10
1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22, 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22,
0, 0, 0, 1, 1, 1,
// 0x20 // 0x20
1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1, 1, 9, 1, 9, 9, 9, 9, 9, 9, 1, 1, 1,
0, 0, 0, 0, 1, 1, 1, 1,
// 0x30 // 0x30
4, 4, 6, 1, 32, 4, 4, 6, 1, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x40 // 0x40
1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x50 // 0x50
392, 392,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x60 // 0x60
1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x70 // 0x70
103, 9, 5, 103, 9, 5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x80+ // 0x80+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
}; };
typedef union typedef union
@ -164,6 +171,8 @@ typedef union
FIFO<CmdFIFOEntry>* CmdFIFO; FIFO<CmdFIFOEntry>* CmdFIFO;
FIFO<CmdFIFOEntry>* CmdPIPE; FIFO<CmdFIFOEntry>* CmdPIPE;
FIFO<CmdFIFOEntry>* CmdStallQueue;
u32 NumCommands, CurCommand, ParamCount, TotalParams; u32 NumCommands, CurCommand, ParamCount, TotalParams;
u32 DispCnt; u32 DispCnt;
@ -276,6 +285,8 @@ bool Init()
CmdFIFO = new FIFO<CmdFIFOEntry>(256); CmdFIFO = new FIFO<CmdFIFOEntry>(256);
CmdPIPE = new FIFO<CmdFIFOEntry>(4); CmdPIPE = new FIFO<CmdFIFOEntry>(4);
CmdStallQueue = new FIFO<CmdFIFOEntry>(64);
if (!SoftRenderer::Init()) return false; if (!SoftRenderer::Init()) return false;
return true; return true;
@ -287,6 +298,8 @@ void DeInit()
delete CmdFIFO; delete CmdFIFO;
delete CmdPIPE; delete CmdPIPE;
delete CmdStallQueue;
} }
void Reset() void Reset()
@ -294,6 +307,8 @@ void Reset()
CmdFIFO->Clear(); CmdFIFO->Clear();
CmdPIPE->Clear(); CmdPIPE->Clear();
CmdStallQueue->Clear();
NumCommands = 0; NumCommands = 0;
CurCommand = 0; CurCommand = 0;
ParamCount = 0; ParamCount = 0;
@ -514,6 +529,20 @@ void DoSavestate(Savestate* file)
// probably not worth storing the vblank-latched Renderxxxxxx variables // probably not worth storing the vblank-latched Renderxxxxxx variables
if (file->Saving ||
file->VersionMajor > 2 ||
(file->VersionMajor == 2 && file->VersionMinor >= 1))
{
// command stall queue, only in version 2.1 and up
CmdStallQueue->DoSavestate(file);
}
else
{
// for version 2.0, just clear it. not having it doesn't matter
// if this comes from older melonDS revisions.
CmdStallQueue->Clear();
}
if (!file->Saving) if (!file->Saving)
{ {
ClipMatrixDirty = true; ClipMatrixDirty = true;
@ -1387,17 +1416,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry)
{ {
if (CmdFIFO->IsFull()) if (CmdFIFO->IsFull())
{ {
//printf("!!! GX FIFO FULL\n"); // store it to the stall queue. stall the system.
//return; // worst case is if a STMxx opcode causes this, which is why our stall queue
// has 64 entries. this is less complicated than trying to make STMxx stall-able.
// temp. hack CmdStallQueue->Write(entry);
// SM64DS seems to overflow the FIFO occasionally NDS::GXFIFOStall();
// either leftover bugs in our implementation, or the game accidentally doing that return;
// TODO: investigate.
// TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore)
while (CmdFIFO->IsFull())
ExecuteCommand();
} }
CmdFIFO->Write(entry); CmdFIFO->Write(entry);
@ -1426,6 +1451,21 @@ CmdFIFOEntry CmdFIFORead()
if (!CmdFIFO->IsEmpty()) if (!CmdFIFO->IsEmpty())
CmdPIPE->Write(CmdFIFO->Read()); CmdPIPE->Write(CmdFIFO->Read());
// empty stall queue if needed
// CmdFIFO should not be full at this point.
if (!CmdStallQueue->IsEmpty())
{
while (!CmdStallQueue->IsEmpty())
{
if (CmdFIFO->IsFull()) break;
CmdFIFOEntry entry = CmdStallQueue->Read();
CmdFIFOWrite(entry);
}
if (CmdStallQueue->IsEmpty())
NDS::GXFIFOUnstall();
}
CheckFIFODMA(); CheckFIFODMA();
CheckFIFOIRQ(); CheckFIFOIRQ();
} }
@ -1450,6 +1490,7 @@ void ExecuteCommand()
for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]); for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);
printf("\n");*/ printf("\n");*/
CycleCount += CmdNumCycles[entry.Command]; CycleCount += CmdNumCycles[entry.Command];
ExecParamCount = 0; ExecParamCount = 0;
if (CycleCount > 0) if (CycleCount > 0)
@ -1852,6 +1893,8 @@ void ExecuteCommand()
break; break;
case 0x40: // begin polygons case 0x40: // begin polygons
// TODO: check if there was a polygon being defined but incomplete
// such cases seem to freeze the GPU
PolygonMode = ExecParams[0] & 0x3; PolygonMode = ExecParams[0] & 0x3;
VertexNum = 0; VertexNum = 0;
VertexNumInPoly = 0; VertexNumInPoly = 0;
@ -1902,6 +1945,12 @@ void ExecuteCommand()
} }
} }
s32 CyclesToRunFor()
{
if (CycleCount < 0) return 0;
return CycleCount;
}
void Run(s32 cycles) void Run(s32 cycles)
{ {
if (FlushRequest) if (FlushRequest)
@ -1924,6 +1973,8 @@ void Run(s32 cycles)
if (CycleCount <= 0 && CmdPIPE->IsEmpty()) if (CycleCount <= 0 && CmdPIPE->IsEmpty())
{ {
// todo: advance remaining pipeline shit here
CycleCount = 0; CycleCount = 0;
GXStat &= ~(1<<27); GXStat &= ~(1<<27);

View File

@ -90,6 +90,7 @@ void DoSavestate(Savestate* file);
void ExecuteCommand(); void ExecuteCommand();
s32 CyclesToRunFor();
void Run(s32 cycles); void Run(s32 cycles);
void CheckFIFOIRQ(); void CheckFIFOIRQ();
void CheckFIFODMA(); void CheckFIFODMA();

View File

@ -108,6 +108,7 @@ bool Running;
void DivDone(u32 param); void DivDone(u32 param);
void SqrtDone(u32 param); void SqrtDone(u32 param);
void RunTimer(u32 tid, s32 cycles);
bool Init() bool Init()
@ -608,12 +609,27 @@ u32 RunFrame()
s32 ndscyclestorun; s32 ndscyclestorun;
// TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1 // TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
// TODO: we need to directly change CurIterationCycles when rescheduling shit
CalcIterationCycles(); CalcIterationCycles();
if (CPUStop & 0x80000000) if (CPUStop & 0x80000000)
{ {
// GXFIFO stall // GXFIFO stall
// we just run the GPU and the timers.
// the rest of the hardware is driven by the event scheduler.
s32 cycles = GPU3D::CyclesToRunFor();
GPU3D::Run(cycles);
u32 timermask = TimerCheckMask[0];
if (timermask & 0x1) RunTimer(0, cycles);
if (timermask & 0x2) RunTimer(1, cycles);
if (timermask & 0x4) RunTimer(2, cycles);
if (timermask & 0x8) RunTimer(3, cycles);
timermask = TimerCheckMask[1];
if (timermask & 0x1) RunTimer(4, cycles);
if (timermask & 0x2) RunTimer(5, cycles);
if (timermask & 0x4) RunTimer(6, cycles);
if (timermask & 0x8) RunTimer(7, cycles);
} }
else else
{ {
@ -818,6 +834,27 @@ void ResumeCPU(u32 cpu, u32 mask)
CPUStop &= ~mask; CPUStop &= ~mask;
} }
void GXFIFOStall()
{
if (CPUStop & 0x80000000) return;
CPUStop |= 0x80000000;
if (CurCPU == 1) ARM9->Halt(2);
else
{
DMAs[0]->StallIfRunning();
DMAs[1]->StallIfRunning();
DMAs[2]->StallIfRunning();
DMAs[3]->StallIfRunning();
}
}
void GXFIFOUnstall()
{
CPUStop &= ~0x80000000;
}
u32 GetPC(u32 cpu) u32 GetPC(u32 cpu)
{ {
return cpu ? ARM7->R[15] : ARM9->R[15]; return cpu ? ARM7->R[15] : ARM9->R[15];

View File

@ -148,6 +148,8 @@ void ClearIRQ(u32 cpu, u32 irq);
bool HaltInterrupted(u32 cpu); bool HaltInterrupted(u32 cpu);
void StopCPU(u32 cpu, u32 mask); void StopCPU(u32 cpu, u32 mask);
void ResumeCPU(u32 cpu, u32 mask); void ResumeCPU(u32 cpu, u32 mask);
void GXFIFOStall();
void GXFIFOUnstall();
u32 GetPC(u32 cpu); u32 GetPC(u32 cpu);

View File

@ -23,7 +23,7 @@
#include "types.h" #include "types.h"
#define SAVESTATE_MAJOR 2 #define SAVESTATE_MAJOR 2
#define SAVESTATE_MINOR 0 #define SAVESTATE_MINOR 1
class Savestate class Savestate
{ {