From 9ffa04dfbc1bf187f3876864f224d404a69a3b05 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 25 Feb 2024 22:41:33 -0500 Subject: [PATCH] approximate rdlines_count; implement underflow flag --- src/GPU.cpp | 4 ++ src/GPU3D.cpp | 3 +- src/GPU3D.h | 3 +- src/GPU3D_Soft.cpp | 115 ++++++++++++++++++++++++++------------------- 4 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/GPU.cpp b/src/GPU.cpp index f23e641e..a78deba6 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -1041,6 +1041,10 @@ void GPU::StartScanline(u32 line) noexcept if (GPU3D.IsRendererAccelerated()) GPU3D.Blit(*this); } + else if (VCount == 183) + { + GPU3D.DispCnt |= GPU3D.RDLinesUnderflow << 12; + } } NDS.ScheduleEvent(Event_LCD, true, HBLANK_CYCLES, LCD_StartHBlank, line); diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 8706724b..a9524e88 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -2509,7 +2509,6 @@ void GPU3D::VBlank() noexcept void GPU3D::VCount215(GPU& gpu) noexcept { - //RDLinesTemp = 46; CurrentRenderer->RenderFrame(gpu); } @@ -2647,7 +2646,7 @@ u16 GPU3D::Read16(u32 addr) noexcept return DispCnt; case 0x04000320: - return RDLines; // CHECKME: Can this always be read? Even when the gpu is powered off? + return RDLines; // CHECKME: Can this always be read? Even when the gpu is powered off? also check 8 bit reads case 0x04000600: { diff --git a/src/GPU3D.h b/src/GPU3D.h index 3d3b0e7f..fb779a68 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -246,6 +246,7 @@ public: bool RenderingEnabled = false; u32 DispCnt = 0; + bool RDLinesUnderflow = false; u8 RDLines = 63; u8 RDLinesTemp = 46; u8 AlphaRefVal = 0; @@ -371,7 +372,7 @@ public: //static constexpr int ScanlineIncrement = 2114 * TimingFrac; // 2114 | how much time a scanline pair "gains" //static constexpr int AbortIncrement = 12 * TimingFrac; // 12 | how much extra to regain after an aborted scanline (total 2126) // (why does the next pair get more time if the previous scanline is aborted?) - static constexpr int UnderflowFlag = 14 * TimingFrac; // 14 | How many cycles need to be left for the 3ddispcnt rdlines underflow flag to be set + //static constexpr int UnderflowFlag = 2 * TimingFrac; // 14 | How many cycles need to be left for the 3ddispcnt rdlines underflow flag to be set //static constexpr int FinishScanline = 512 * TimingFrac; // GPU 3D Rasterization Timings II: For Tracking Timing Behaviors diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 50d1104f..0600b435 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -1855,6 +1855,10 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) //init internal buffer ClearBuffers(gpu); + + // reset scanline trackers + gpu.GPU3D.RDLinesUnderflow = false; + gpu.GPU3D.RDLinesTemp = 63; u32 slread[192]; // scanline read times for (int i = 0, time = InitGPU2DTimeout; i < 192; i++, time += ScanlineReadInc) // CHECKME: is this computed at compile time? @@ -1872,6 +1876,7 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) u32 timespent; u32 prevtimespent; + // scanlines are rendered in pairs of two RenderScanline(gpu, 0, j, &rastertimingeven); RenderScanline(gpu, 1, j, &rastertimingodd); @@ -1880,8 +1885,7 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) RasterTiming = timespent = std::max(std::initializer_list {rastertimingeven, rastertimingodd, FinalPassLen}); // 12 cycles at the end of a "timeout" are always used for w/e reason RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12); - - gpu.GPU3D.RDLinesTemp = 46; + // if first pair was not delayed past the first read, then later scanlines cannot either // this allows us to implement a fast path //if (slread[0] - timespent + ScanlinePushDelay >= 256) @@ -1892,13 +1896,72 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) RenderScanline(gpu, 2, j, &rastertimingeven); RenderScanline(gpu, 3, j, &rastertimingodd); + // the time spent on the previous scanline pair is important for emulating the edge marking bug properly prevtimespent = timespent; RasterTiming += timespent = std::max(std::initializer_list {rastertimingeven, rastertimingodd, FinalPassLen}); RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12); - scanlineswaiting++; + // set the underflow flag if one of the scanlines came within 14 cycles of visible underflow + if (ScanlineTimeout <= RasterTiming) gpu.GPU3D.RDLinesUnderflow = true; + scanlineswaiting++; + + // simulate the process of scanlines being read from the 48 scanline buffer while (RasterTiming >= slread[nextread] + 565) + { + if (RasterTiming < slread[nextread] + 565) + { + RasterTiming += timespent = (slread[nextread] + 565) - RasterTiming; // why + 565? + timespent += 571; // fixes edge marking bug emulation. not sure why this is needed? + } + scanlineswaiting--; + nextread++; + // update rdlines_count register + if (gpu.GPU3D.RDLinesTemp > scanlineswaiting) gpu.GPU3D.RDLinesTemp = scanlineswaiting; // TODO: not accurate, rdlines appears to update early in some manner? + } + + // final pass pairs are the previous scanline pair offset -1 scanline, thus we start with only building one + ScanlineFinalPass(gpu.GPU3D, 0, true, timespent >= 502); + for (int y = 4; y < 192; y+=2) + { + //update sl timeout + ScanlineTimeout = slread[y-1] - FinalPassLen; + + RenderScanline(gpu, y, j, &rastertimingeven); + RenderScanline(gpu, y+1, j, &rastertimingodd); + + prevtimespent = timespent; + RasterTiming += timespent = std::max(std::initializer_list {rastertimingeven, rastertimingodd, FinalPassLen}); + RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12); + + // set the underflow flag if one of the scanlines came within 14 cycles of visible underflow + if (ScanlineTimeout <= RasterTiming) gpu.GPU3D.RDLinesUnderflow = true; + + scanlineswaiting+=2; + + // simulate the process of scanlines being read from the 48 scanline buffer + while (scanlineswaiting >= 47 || RasterTiming >= slread[nextread] + 565) + { + if (RasterTiming < slread[nextread] + 565) + { + RasterTiming += timespent = (slread[nextread] + 565) - RasterTiming; // why + 565? + timespent += 571; // fixes edge marking bug emulation. not sure why this is needed? + } + scanlineswaiting--; + nextread++; + // update rdlines_count register + if (gpu.GPU3D.RDLinesTemp > scanlineswaiting) gpu.GPU3D.RDLinesTemp = scanlineswaiting; // TODO: not accurate, rdlines appears to update early in some manner? + } + + ScanlineFinalPass(gpu.GPU3D, y-3, prevtimespent >= 502 || y-3 == 1, timespent >= 502); + ScanlineFinalPass(gpu.GPU3D, y-2, prevtimespent >= 502, timespent >= 502); + } + scanlineswaiting+= 2; + prevtimespent = timespent; + + // emulate read timings one last time, since it shouldn't matter after this + // additionally dont bother tracking rdlines anymore since it shouldn't be able to decrement anymore (CHECKME) + while (scanlineswaiting >= 47 || RasterTiming >= slread[nextread] + 565) { if (RasterTiming < slread[nextread] + 565) { @@ -1909,55 +1972,11 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) nextread++; } - ScanlineFinalPass(gpu.GPU3D, 0, true, timespent >= 502); - for (int y = 4; y < 192; y+=2) - { - ScanlineTimeout = slread[y-1] - FinalPassLen; - - RenderScanline(gpu, y, j, &rastertimingeven); - RenderScanline(gpu, y+1, j, &rastertimingodd); - - prevtimespent = timespent; - RasterTiming += timespent = std::max(std::initializer_list {rastertimingeven, rastertimingodd, FinalPassLen}); - RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12); - - scanlineswaiting+=2; - - while (scanlineswaiting >= 47 || RasterTiming >= slread[nextread] + 565) - { - if (RasterTiming < slread[nextread] + 565) - { - RasterTiming += timespent = (slread[nextread] + 565) - RasterTiming; // why + 565? - timespent += 571; // fixes edge marking bug emulation. not sure why this is needed? - } - scanlineswaiting--; - nextread++; - } - - ScanlineFinalPass(gpu.GPU3D, y-3, prevtimespent >= 502 || y-3 == 1, timespent >= 502); - ScanlineFinalPass(gpu.GPU3D, y-2, prevtimespent >= 502, timespent >= 502); - } - scanlineswaiting+= 2; - prevtimespent = timespent; - - // do this one last time to allow for edge marking bug emulation. - while (scanlineswaiting >= 47 || RasterTiming >= slread[nextread] + 565) - { - if (RasterTiming < slread[nextread] + 565) - { - RasterTiming += timespent = (slread[nextread] + 565) - RasterTiming; // why + 565? - timespent += 571; // fixes edge marking bug emulation. not sure why this is needed? - } - scanlineswaiting--; - nextread++; - } - + // finish the last 3 scanlines ScanlineFinalPass(gpu.GPU3D, 189, prevtimespent >= 502, timespent >= 502); ScanlineFinalPass(gpu.GPU3D, 190, prevtimespent >= 502, true); - // skip timing emulation here since it's irrelevant, also use timespent instead of prev because we're skipping timing emulation ScanlineFinalPass(gpu.GPU3D, 191, timespent >= 502, true); - } /*else {