a few attempts at optimization

This commit is contained in:
StapleButter
2017-04-23 15:25:15 +02:00
parent 60cdc7d6f7
commit 1759672d14
6 changed files with 109 additions and 42 deletions

View File

@ -332,7 +332,7 @@ s32 ARM::Execute()
else if (NDS::HaltInterrupted(Num))
{
Halted = 0;
if (NDS::IME[Num]&1)
if (NDS::IME[Num] & 0x1)
TriggerIRQ();
}
else
@ -403,9 +403,9 @@ s32 ARM::Execute()
Cycles = CyclesToRun;
break;
}
if (NDS::HaltInterrupted(Num))
if (NDS::IF[Num] & NDS::IE[Num])
{
if (NDS::IME[Num]&1)
if (NDS::IME[Num] & 0x1)
TriggerIRQ();
}
}

View File

@ -233,6 +233,23 @@ s32 DMA::Run(s32 cycles)
}
else
{
// optimized path for typical GXFIFO DMA
if (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0)
{
while (IterCount > 0 && cycles > 0)
{
GPU3D::WriteToGXFIFO(*(u32*)&NDS::MainRAM[CurSrcAddr&0x3FFFFF]);
s32 c = (Waitstates[1][0x2] + Waitstates[1][0x4]);
cycles -= c;
NDS::RunTimingCriticalDevices(0, c);
CurSrcAddr += SrcAddrInc<<2;
IterCount--;
RemCount--;
}
}
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;

View File

@ -1807,6 +1807,45 @@ u32* GetLine(int line)
}
void WriteToGXFIFO(u32 val)
{
if (NumCommands == 0)
{
NumCommands = 4;
CurCommand = val;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
if (TotalParams > 0) return;
}
else
ParamCount++;
for (;;)
{
if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
{
CmdFIFOEntry entry;
entry.Command = CurCommand & 0xFF;
entry.Param = val;
CmdFIFOWrite(entry);
}
if (ParamCount >= TotalParams)
{
CurCommand >>= 8;
NumCommands--;
if (NumCommands == 0) break;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
}
if (ParamCount < TotalParams)
break;
}
}
u8 Read8(u32 addr)
{
printf("unknown GPU3D read8 %08X\n", addr);
@ -2012,41 +2051,7 @@ void Write32(u32 addr, u32 val)
if (addr >= 0x04000400 && addr < 0x04000440)
{
if (NumCommands == 0)
{
NumCommands = 4;
CurCommand = val;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
if (TotalParams > 0) return;
}
else
ParamCount++;
for (;;)
{
if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
{
CmdFIFOEntry entry;
entry.Command = CurCommand & 0xFF;
entry.Param = val;
CmdFIFOWrite(entry);
}
if (ParamCount >= TotalParams)
{
CurCommand >>= 8;
NumCommands--;
if (NumCommands == 0) break;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
}
if (ParamCount < TotalParams)
break;
}
WriteToGXFIFO(val);
return;
}

View File

@ -85,6 +85,8 @@ void VBlank();
void VCount215();
u32* GetLine(int line);
void WriteToGXFIFO(u32 val);
u8 Read8(u32 addr);
u16 Read16(u32 addr);
u32 Read32(u32 addr);

View File

@ -541,7 +541,6 @@ bool DepthTest(s32 oldz, s32 z)
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
{
u32 attr = polygon->Attr;
u8 r, g, b, a;
u32 blendmode = (polygon->Attr >> 4) & 0x3;
@ -910,7 +909,10 @@ void RenderPolygon(Polygon* polygon)
// wireframe polygons. really ugly, but works
if (wireframe && edge==0)
{
x = r_edgestart + 1;
continue;
}
u32 pixeladdr = (y*256) + x;
u32 attr = polygon->Attr & 0x3F008000;