a few attempts at optimization

This commit is contained in:
StapleButter 2017-04-23 15:25:15 +02:00
parent 60cdc7d6f7
commit 1759672d14
6 changed files with 109 additions and 42 deletions

View File

@ -51,12 +51,12 @@
<Option type="1" /> <Option type="1" />
<Option compiler="gcc" /> <Option compiler="gcc" />
<Compiler> <Compiler>
<Add option="-O2" /> <Add option="-O3" />
<Add option="-m64" /> <Add option="-m64" />
<Add option="-D_FILE_OFFSET_BITS=64" />
<Add option="-D__WXMSW__" />
<Add option="-I$(TARGET_COMPILER_DIR)/lib/wx/include/msw-unicode-static-3.0" /> <Add option="-I$(TARGET_COMPILER_DIR)/lib/wx/include/msw-unicode-static-3.0" />
<Add option="-I$(TARGET_COMPILER_DIR)/include/wx-3.0" /> <Add option="-I$(TARGET_COMPILER_DIR)/include/wx-3.0" />
<Add option="-D_FILE_OFFSET_BITS=64" />
<Add option="-D__WXMSW__" />
</Compiler> </Compiler>
<Linker> <Linker>
<Add option="-s" /> <Add option="-s" />
@ -116,6 +116,45 @@
<Add option="-m64" /> <Add option="-m64" />
</Linker> </Linker>
</Target> </Target>
<Target title="Profile Windows">
<Option platforms="Windows;" />
<Option output="bin/Release/melonDS" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="1" />
<Option compiler="gcc" />
<Compiler>
<Add option="-O2" />
<Add option="-m64" />
<Add option="-g" />
<Add option="-I$(TARGET_COMPILER_DIR)/lib/wx/include/msw-unicode-static-3.0" />
<Add option="-I$(TARGET_COMPILER_DIR)/include/wx-3.0" />
<Add option="-D_FILE_OFFSET_BITS=64" />
<Add option="-D__WXMSW__" />
</Compiler>
<Linker>
<Add option="-m64" />
<Add library=":libwx_mswu_core-3.0.a" />
<Add library=":libwx_baseu-3.0.a" />
<Add library="SDL2" />
<Add library=":libpng.a" />
<Add library=":libjpeg.a" />
<Add library=":libtiff.a" />
<Add library=":libz.a" />
<Add library="rpcrt4" />
<Add library="oleaut32" />
<Add library="ole32" />
<Add library="uuid" />
<Add library="winspool" />
<Add library="winmm" />
<Add library="shell32" />
<Add library="comctl32" />
<Add library="comdlg32" />
<Add library="advapi32" />
<Add library="wsock32" />
<Add library="oleacc" />
<Add library="gdi32" />
</Linker>
</Target>
</Build> </Build>
<Compiler> <Compiler>
<Add option="-Wall" /> <Add option="-Wall" />
@ -126,6 +165,7 @@
<Option compilerVar="WINDRES" /> <Option compilerVar="WINDRES" />
<Option target="Debug Windows" /> <Option target="Debug Windows" />
<Option target="Release Windows" /> <Option target="Release Windows" />
<Option target="Profile Windows" />
</Unit> </Unit>
<Unit filename="src/ARM.cpp" /> <Unit filename="src/ARM.cpp" />
<Unit filename="src/ARM.h" /> <Unit filename="src/ARM.h" />
@ -176,6 +216,7 @@
<Unit filename="xp.manifest"> <Unit filename="xp.manifest">
<Option target="Debug Windows" /> <Option target="Debug Windows" />
<Option target="Release Windows" /> <Option target="Release Windows" />
<Option target="Profile Windows" />
</Unit> </Unit>
<Extensions> <Extensions>
<code_completion /> <code_completion />

View File

@ -332,7 +332,7 @@ s32 ARM::Execute()
else if (NDS::HaltInterrupted(Num)) else if (NDS::HaltInterrupted(Num))
{ {
Halted = 0; Halted = 0;
if (NDS::IME[Num]&1) if (NDS::IME[Num] & 0x1)
TriggerIRQ(); TriggerIRQ();
} }
else else
@ -403,9 +403,9 @@ s32 ARM::Execute()
Cycles = CyclesToRun; Cycles = CyclesToRun;
break; break;
} }
if (NDS::HaltInterrupted(Num)) if (NDS::IF[Num] & NDS::IE[Num])
{ {
if (NDS::IME[Num]&1) if (NDS::IME[Num] & 0x1)
TriggerIRQ(); TriggerIRQ();
} }
} }

View File

@ -233,6 +233,23 @@ s32 DMA::Run(s32 cycles)
} }
else else
{ {
// optimized path for typical GXFIFO DMA
if (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0)
{
while (IterCount > 0 && cycles > 0)
{
GPU3D::WriteToGXFIFO(*(u32*)&NDS::MainRAM[CurSrcAddr&0x3FFFFF]);
s32 c = (Waitstates[1][0x2] + Waitstates[1][0x4]);
cycles -= c;
NDS::RunTimingCriticalDevices(0, c);
CurSrcAddr += SrcAddrInc<<2;
IterCount--;
RemCount--;
}
}
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;

View File

@ -1807,6 +1807,45 @@ u32* GetLine(int line)
} }
void WriteToGXFIFO(u32 val)
{
if (NumCommands == 0)
{
NumCommands = 4;
CurCommand = val;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
if (TotalParams > 0) return;
}
else
ParamCount++;
for (;;)
{
if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
{
CmdFIFOEntry entry;
entry.Command = CurCommand & 0xFF;
entry.Param = val;
CmdFIFOWrite(entry);
}
if (ParamCount >= TotalParams)
{
CurCommand >>= 8;
NumCommands--;
if (NumCommands == 0) break;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
}
if (ParamCount < TotalParams)
break;
}
}
u8 Read8(u32 addr) u8 Read8(u32 addr)
{ {
printf("unknown GPU3D read8 %08X\n", addr); printf("unknown GPU3D read8 %08X\n", addr);
@ -2012,41 +2051,7 @@ void Write32(u32 addr, u32 val)
if (addr >= 0x04000400 && addr < 0x04000440) if (addr >= 0x04000400 && addr < 0x04000440)
{ {
if (NumCommands == 0) WriteToGXFIFO(val);
{
NumCommands = 4;
CurCommand = val;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
if (TotalParams > 0) return;
}
else
ParamCount++;
for (;;)
{
if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
{
CmdFIFOEntry entry;
entry.Command = CurCommand & 0xFF;
entry.Param = val;
CmdFIFOWrite(entry);
}
if (ParamCount >= TotalParams)
{
CurCommand >>= 8;
NumCommands--;
if (NumCommands == 0) break;
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
}
if (ParamCount < TotalParams)
break;
}
return; return;
} }

View File

@ -85,6 +85,8 @@ void VBlank();
void VCount215(); void VCount215();
u32* GetLine(int line); u32* GetLine(int line);
void WriteToGXFIFO(u32 val);
u8 Read8(u32 addr); u8 Read8(u32 addr);
u16 Read16(u32 addr); u16 Read16(u32 addr);
u32 Read32(u32 addr); u32 Read32(u32 addr);

View File

@ -541,7 +541,6 @@ bool DepthTest(s32 oldz, s32 z)
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
{ {
u32 attr = polygon->Attr;
u8 r, g, b, a; u8 r, g, b, a;
u32 blendmode = (polygon->Attr >> 4) & 0x3; u32 blendmode = (polygon->Attr >> 4) & 0x3;
@ -910,7 +909,10 @@ void RenderPolygon(Polygon* polygon)
// wireframe polygons. really ugly, but works // wireframe polygons. really ugly, but works
if (wireframe && edge==0) if (wireframe && edge==0)
{
x = r_edgestart + 1;
continue; continue;
}
u32 pixeladdr = (y*256) + x; u32 pixeladdr = (y*256) + x;
u32 attr = polygon->Attr & 0x3F008000; u32 attr = polygon->Attr & 0x3F008000;