approximate rdlines_count; implement underflow flag

This commit is contained in:
Jaklyy 2024-02-25 22:41:33 -05:00
parent 9219a084c4
commit 9ffa04dfbc
4 changed files with 74 additions and 51 deletions

View File

@ -1041,6 +1041,10 @@ void GPU::StartScanline(u32 line) noexcept
if (GPU3D.IsRendererAccelerated())
GPU3D.Blit(*this);
}
else if (VCount == 183)
{
GPU3D.DispCnt |= GPU3D.RDLinesUnderflow << 12;
}
}
NDS.ScheduleEvent(Event_LCD, true, HBLANK_CYCLES, LCD_StartHBlank, line);

View File

@ -2509,7 +2509,6 @@ void GPU3D::VBlank() noexcept
void GPU3D::VCount215(GPU& gpu) noexcept
{
//RDLinesTemp = 46;
CurrentRenderer->RenderFrame(gpu);
}
@ -2647,7 +2646,7 @@ u16 GPU3D::Read16(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return RDLines; // CHECKME: Can this always be read? Even when the gpu is powered off?
return RDLines; // CHECKME: Can this always be read? Even when the gpu is powered off? also check 8 bit reads
case 0x04000600:
{

View File

@ -246,6 +246,7 @@ public:
bool RenderingEnabled = false;
u32 DispCnt = 0;
bool RDLinesUnderflow = false;
u8 RDLines = 63;
u8 RDLinesTemp = 46;
u8 AlphaRefVal = 0;
@ -371,7 +372,7 @@ public:
//static constexpr int ScanlineIncrement = 2114 * TimingFrac; // 2114 | how much time a scanline pair "gains"
//static constexpr int AbortIncrement = 12 * TimingFrac; // 12 | how much extra to regain after an aborted scanline (total 2126)
// (why does the next pair get more time if the previous scanline is aborted?)
static constexpr int UnderflowFlag = 14 * TimingFrac; // 14 | How many cycles need to be left for the 3ddispcnt rdlines underflow flag to be set
//static constexpr int UnderflowFlag = 2 * TimingFrac; // 14 | How many cycles need to be left for the 3ddispcnt rdlines underflow flag to be set
//static constexpr int FinishScanline = 512 * TimingFrac;
// GPU 3D Rasterization Timings II: For Tracking Timing Behaviors

View File

@ -1856,6 +1856,10 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
//init internal buffer
ClearBuffers(gpu);
// reset scanline trackers
gpu.GPU3D.RDLinesUnderflow = false;
gpu.GPU3D.RDLinesTemp = 63;
u32 slread[192]; // scanline read times
for (int i = 0, time = InitGPU2DTimeout; i < 192; i++, time += ScanlineReadInc) // CHECKME: is this computed at compile time?
{
@ -1872,6 +1876,7 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
u32 timespent;
u32 prevtimespent;
// scanlines are rendered in pairs of two
RenderScanline(gpu, 0, j, &rastertimingeven);
RenderScanline(gpu, 1, j, &rastertimingodd);
@ -1881,7 +1886,6 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
// 12 cycles at the end of a "timeout" are always used for w/e reason
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12);
gpu.GPU3D.RDLinesTemp = 46;
// if first pair was not delayed past the first read, then later scanlines cannot either
// this allows us to implement a fast path
//if (slread[0] - timespent + ScanlinePushDelay >= 256)
@ -1892,12 +1896,17 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
RenderScanline(gpu, 2, j, &rastertimingeven);
RenderScanline(gpu, 3, j, &rastertimingodd);
// the time spent on the previous scanline pair is important for emulating the edge marking bug properly
prevtimespent = timespent;
RasterTiming += timespent = std::max(std::initializer_list<s32> {rastertimingeven, rastertimingodd, FinalPassLen});
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12);
// set the underflow flag if one of the scanlines came within 14 cycles of visible underflow
if (ScanlineTimeout <= RasterTiming) gpu.GPU3D.RDLinesUnderflow = true;
scanlineswaiting++;
// simulate the process of scanlines being read from the 48 scanline buffer
while (RasterTiming >= slread[nextread] + 565)
{
if (RasterTiming < slread[nextread] + 565)
@ -1907,11 +1916,15 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
}
scanlineswaiting--;
nextread++;
// update rdlines_count register
if (gpu.GPU3D.RDLinesTemp > scanlineswaiting) gpu.GPU3D.RDLinesTemp = scanlineswaiting; // TODO: not accurate, rdlines appears to update early in some manner?
}
// final pass pairs are the previous scanline pair offset -1 scanline, thus we start with only building one
ScanlineFinalPass<true>(gpu.GPU3D, 0, true, timespent >= 502);
for (int y = 4; y < 192; y+=2)
{
//update sl timeout
ScanlineTimeout = slread[y-1] - FinalPassLen;
RenderScanline(gpu, y, j, &rastertimingeven);
@ -1921,8 +1934,12 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
RasterTiming += timespent = std::max(std::initializer_list<s32> {rastertimingeven, rastertimingodd, FinalPassLen});
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12);
// set the underflow flag if one of the scanlines came within 14 cycles of visible underflow
if (ScanlineTimeout <= RasterTiming) gpu.GPU3D.RDLinesUnderflow = true;
scanlineswaiting+=2;
// simulate the process of scanlines being read from the 48 scanline buffer
while (scanlineswaiting >= 47 || RasterTiming >= slread[nextread] + 565)
{
if (RasterTiming < slread[nextread] + 565)
@ -1932,6 +1949,8 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
}
scanlineswaiting--;
nextread++;
// update rdlines_count register
if (gpu.GPU3D.RDLinesTemp > scanlineswaiting) gpu.GPU3D.RDLinesTemp = scanlineswaiting; // TODO: not accurate, rdlines appears to update early in some manner?
}
ScanlineFinalPass<true>(gpu.GPU3D, y-3, prevtimespent >= 502 || y-3 == 1, timespent >= 502);
@ -1940,7 +1959,8 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
scanlineswaiting+= 2;
prevtimespent = timespent;
// do this one last time to allow for edge marking bug emulation.
// emulate read timings one last time, since it shouldn't matter after this
// additionally dont bother tracking rdlines anymore since it shouldn't be able to decrement anymore (CHECKME)
while (scanlineswaiting >= 47 || RasterTiming >= slread[nextread] + 565)
{
if (RasterTiming < slread[nextread] + 565)
@ -1952,12 +1972,11 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
nextread++;
}
// finish the last 3 scanlines
ScanlineFinalPass<true>(gpu.GPU3D, 189, prevtimespent >= 502, timespent >= 502);
ScanlineFinalPass<true>(gpu.GPU3D, 190, prevtimespent >= 502, true);
// skip timing emulation here since it's irrelevant, also use timespent instead of prev because we're skipping timing emulation
ScanlineFinalPass<true>(gpu.GPU3D, 191, timespent >= 502, true);
}
/*else
{