From a46316d71f187717ad8f45fcc3c42a6ed3a70c6d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:18:39 -0500 Subject: [PATCH] improved timings for the first 50 scanlines --- src/GPU3D.cpp | 9 ++--- src/GPU3D.h | 21 +++++++---- src/GPU3D_Soft.cpp | 93 ++++++++++++++++++++++++++++++---------------- src/GPU3D_Soft.h | 3 +- 4 files changed, 79 insertions(+), 47 deletions(-) diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 12da23db..6fb24979 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -222,8 +222,7 @@ void GPU3D::Reset() noexcept AlphaRefVal = 0; AlphaRef = 0; - RDLines = 46; - RDLinesMin = 46; + RDLinesDisplay = 46; memset(ToonTable, 0, sizeof(ToonTable)); memset(EdgeTable, 0, sizeof(EdgeTable)); @@ -2370,7 +2369,7 @@ void GPU3D::CheckFIFODMA() noexcept void GPU3D::VCount144() noexcept { - RDLinesMin = 46; + RDLinesDisplay = 46; CurrentRenderer->VCount144(); } @@ -2614,7 +2613,7 @@ u16 GPU3D::Read16(u32 addr) noexcept return DispCnt; case 0x04000320: - return RDLines; // IT IS TIME + return RDLinesDisplay; // IT IS TIME case 0x04000600: { @@ -2658,7 +2657,7 @@ u32 GPU3D::Read32(u32 addr) noexcept return DispCnt; case 0x04000320: - return RDLines; // IT IS TIME + return RDLinesDisplay; // IT IS TIME case 0x04000600: { diff --git a/src/GPU3D.h b/src/GPU3D.h index e3e4cc09..57553782 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -245,8 +245,7 @@ public: bool RenderingEnabled = false; u32 DispCnt = 0; - u32 RDLines = 0; - u32 RDLinesMin = 0; + u32 RDLinesDisplay = 0; u8 AlphaRefVal = 0; u8 AlphaRef = 0; @@ -334,13 +333,21 @@ public: // rasteriztion timing constants static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision + // GPU 2D read timings, for emulating race conditions static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac; - static constexpr int GPU2DSpeedOutsidePair = 948 * TimingFrac; - static constexpr int ScanlinePairLength = 2130 * TimingFrac; - static constexpr int ScanlineTimeout = 2126 * TimingFrac; + static constexpr int GPU2DSpeedOutsidePair = 810 * TimingFrac; + static constexpr int GPU2DSpeedReadScanline = 256 * TimingFrac; static constexpr int InitGPU2DTimeout = 51618 * TimingFrac; - static constexpr int ScanlineBreak = 4 * TimingFrac; + // GPU 3D rasterization timings, for emulating the timeout + static constexpr int ScanlinePairLength = 2130 * TimingFrac; + static constexpr int ScanlineTimeout = 1686 * TimingFrac; // 2126? 1686? + static constexpr int ScanlineBreak = 4 * TimingFrac; + static constexpr int ScanlineBreak2 = 40 * TimingFrac; + static constexpr int IncrementStrange = 1618 * TimingFrac; // 1882? 1442? 1618?? + static constexpr int FreeTiming = 440 * TimingFrac; + + // GPU 3D rasterization timings II, for counting each element with timing characteristics static constexpr int PerPolyTiming = 12 * TimingFrac; // should be correct for *most* line polygons and polygons with vertical slopes static constexpr int PerPixelTiming = 1 * TimingFrac; // does not apply to the first 4 pixels in a polygon (per scanline?) @@ -348,7 +355,7 @@ public: static constexpr int PerScanlineTiming = 1064 * TimingFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED static constexpr int PerScanlineRecup = 2112 * TimingFrac; // seems to check out? // should be the "free" time the gpu has to do the calculation static constexpr int PerRightSlope = 1 * TimingFrac; - static constexpr int EmptyPolyScanline = 4 * TimingFrac - 14; // seems to be slightly under 4? + static constexpr int EmptyPolyScanline = 4 * TimingFrac;// - 14; // seems to be slightly under 4? //static constexpr int FirstPixelTiming; class Renderer3D diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 47bfa3d1..7cb8002a 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -121,44 +121,43 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd) else counter = &RasterTimingEven; *counter += cycles; - if (RasterTiming - *counter) return false; + if (RasterTiming - *counter > 0) return false; GPU.GPU3D.DispCnt |= (1<<12); return true; } -u32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd) +u32 SoftRenderer::DoTimingsPixels(s32 pixels, bool odd) { // calculate and return the difference between the old span and the new span, while adding timings to the timings counter // pixels dont count towards timings if they're the first 4 pixels in a scanline (for some reason?) if (pixels <= 4) return 0; - u32 pixelsremain = pixels-4; + pixels -= 4; s32* counter; if (odd) counter = &RasterTimingOdd; else counter = &RasterTimingEven; - //todo: do this without a for loop somehow. - for (; pixelsremain > 0; pixelsremain--) + //todo: figure out a faster way to support TimingFrac > 1 without using a for loop somehow. + if constexpr (TimingFrac > 1) + for (; pixels > 0; pixels--) + { + *counter += TimingFrac; + if ((RasterTiming - *counter) <= 0) break; + } + else { - *counter += TimingFrac; - if (!(RasterTiming - *counter)) break; + *counter += pixels; + pixels = -(RasterTiming - *counter); + if (pixels > 0) *counter -= pixels; } - if (pixelsremain <= 0) return 0; + if (pixels <= 0) return 0; GPU.GPU3D.DispCnt |= (1<<12); - return pixelsremain; -} - -void SoftRenderer::EndScanline(bool odd) -{ - if (!odd) - { - RasterTiming += std::max(RasterTimingOdd, RasterTimingEven); - } + return pixels; } void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) @@ -1458,7 +1457,6 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd) } } - EndScanline(odd); return abort; } @@ -1758,30 +1756,59 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) s32 y = 0; s8 prevbufferline = -2; - u8 buffersize = 0; - RasterTiming = (ScanlinePairLength * 24); - RasterTimingOdd = 0; - RasterTimingEven = 0; - + s8 buffersize = 0; + RasterTiming = InitialTiming; + s32 timingadvance = InitialTiming; + bool abort = false; + //u32* RDLinesReg = &GPU.GPU3D.RDLines; ClearBuffers(); for (u8 quarter = 0; quarter < 4; quarter++) for (u8 bufferline = 0; bufferline < 48; bufferline += 2) { - RasterTiming += (ScanlineTimeout); + RasterTimingOdd = 0; + RasterTimingEven = 0; + + if (buffersize > 48) + { + RasterTiming = ScanlinePairLength * 23; + timingadvance = 0; + buffersize = 48; + } + if (!abort) RasterTiming += IncrementStrange; + else RasterTiming += ScanlineTimeout; - if (buffersize >= 50) RasterTiming = (ScanlinePairLength * 23) + ScanlineTimeout; + abort = RenderScanline(y, j, true); + abort = RenderScanline(y+1, j, false); - RenderScanline(y, j, true); - RenderScanline(y+1, j, false); - RasterTiming += ScanlineBreak; + buffersize += 2; + //RasterTiming += ScanlineBreak; + s32 timespent = std::max(RasterTimingOdd, RasterTimingEven); - u32* RDLinesReg = &GPU.GPU3D.RDLines; - *RDLinesReg = 0; - for (int i = RasterTiming; i > ScanlinePairLength / 2; i -= ScanlinePairLength / 2) *RDLinesReg += 1; + /*if (timespent > FreeTiming) + { + abort = true; + timespent -= FreeTiming; + } + else if (!abort) + { + abort = false; + timespent -= FreeTiming; + }*/ + //if (!abort) + timespent -= FreeTiming; + + if (timespent > 0) + { + RasterTiming -= timespent; + timingadvance -= timespent; + } + + if (timingadvance < 0) for (s32 i = (ScanlinePairLength / 2) * buffersize; i > RasterTiming + (ScanlinePairLength / 2); i -= ScanlinePairLength / 2) buffersize -= 1; + if (buffersize < 0) buffersize = 0; + // seems to display the lowest scanline buffer count reached during the current frame. // we also caps it to 46 here, because this reg does that too for some reason. - if (*RDLinesReg > GPU.GPU3D.RDLinesMin) *RDLinesReg = GPU.GPU3D.RDLinesMin; - else if (*RDLinesReg < GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLinesMin = *RDLinesReg; + if (quarter >= 1 && buffersize < GPU.GPU3D.RDLinesDisplay) GPU.GPU3D.RDLinesDisplay = buffersize; if (prevbufferline >= 0) { diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 5628d73e..01187a8a 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -455,8 +455,7 @@ private: melonDS::GPU& GPU; RendererPolygon PolygonList[2048]; bool DoTimings(s32 cycles, bool odd); - u32 DoTimingsPixels(u32 pixels, bool odd); - void EndScanline(bool odd); + u32 DoTimingsPixels(s32 pixels, bool odd); void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha); u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t); void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);