From 2bf033e0bcc70026524a8a75cb2bf48c7a3ea496 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 10 Dec 2023 18:41:17 -0500 Subject: [PATCH] optimize per pixel timing counting --- src/GPU3D.h | 16 ++++----- src/GPU3D_Soft.cpp | 88 ++++++++++++++++++++++++++++++---------------- src/GPU3D_Soft.h | 3 +- 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/src/GPU3D.h b/src/GPU3D.h index 8c4b22ca..284e101f 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -332,14 +332,14 @@ public: }; // numbers based on 339 poly 64-172 horiz. line poly - static constexpr int Frac = 481; // add a fractional component if pixels is not enough precision - static constexpr int RasterTimingCap = 51116*Frac; - static constexpr int PerScanlineTiming = 1064*Frac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED - static constexpr int PerScanlineRecup = 2112*Frac; // seems to check out? - static constexpr int PerRightSlope = 1*Frac; - static constexpr int PerPolyTiming = 12*Frac; // should be correct for *most* line polygons and polygons with vertical slopes - static constexpr int PerPixelTiming = 1*Frac; // does not apply to the first 4 pixels in a polygon (per scanline?) - static constexpr int EmptyPolyScanline = 4*Frac - 14; // seems to be slightly under 4? + static constexpr int RasterFrac = 481; // add a fractional component if pixels is not enough precision + static constexpr int RasterTimingCap = 51116*RasterFrac; + static constexpr int PerScanlineTiming = 1064*RasterFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED + static constexpr int PerScanlineRecup = 2112*RasterFrac; // seems to check out? + static constexpr int PerRightSlope = 1*RasterFrac; + static constexpr int PerPolyTiming = 12*RasterFrac; // should be correct for *most* line polygons and polygons with vertical slopes + static constexpr int PerPixelTiming = 1*RasterFrac; // does not apply to the first 4 pixels in a polygon (per scanline?) + static constexpr int EmptyPolyScanline = 4*RasterFrac - 14; // seems to be slightly under 4? //static constexpr int FirstPixelTiming; class Renderer3D diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 4aaf00ea..306d7db1 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -129,6 +129,29 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd) return true; } +s32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd) +{ + // return the difference between the old span and the new span + if (pixels <= 4) return 0; + + u32 pixeltiming = (pixels - 4) * RasterFrac; + + if (odd) + { + u32 rasterend = RasterTimingCap - (RasterTimingCounterOdd + RasterTimingCounterPrev); + pixeltiming = rasterend - pixeltiming; + } + else + { + u32 rasterend = RasterTimingCap - (RasterTimingCounterEven + RasterTimingCounterPrev); + pixeltiming = rasterend - pixeltiming; + } + if (pixeltiming > 0) return 0; + + GPU.GPU3D.DispCnt |= (1<<12); + return pixels - (((pixeltiming + (RasterFrac-1)) / RasterFrac) + 4); +} + void SoftRenderer::EndScanline(bool odd) { if (!odd) @@ -707,11 +730,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly } } -bool SoftRenderer::Step(RendererPolygon* rp, bool abortscanline) +void SoftRenderer::Step(RendererPolygon* rp) { rp->XL = rp->SlopeL.Step(); rp->XR = rp->SlopeR.Step(); - return abortscanline; } void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y) @@ -981,7 +1003,11 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) CheckSlope(rp, y); - if (DoTimings(PerPolyTiming, odd)) return Step(rp, true); + if (DoTimings(PerPolyTiming, odd)) + { + Step(rp); + return true; + } Vertex *vlcur, *vlnext, *vrcur, *vrnext; s32 xstart, xend; @@ -990,7 +1016,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) s32 l_edgecov, r_edgecov; Interpolator<1>* interp_start; Interpolator<1>* interp_end; - u16 pixelsrendered = 0; // for tracking timings + bool abortscanline = false; // to abort the rest of the scanline after finishing this polygon xstart = rp->XL; xend = rp->XR; @@ -1109,31 +1135,38 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + xend += 1; + Interpolator<0> interpX(xstart, xend, wl, wr); if (x < 0) x = 0; s32 xlimit; s32 xcov = 0; + if (xend > 256) xend = 256; + + // determine if the span can be rendered within the time allotted to the scanline + s32 diff = DoTimingsPixels(xend-x, odd); + if (diff != 0) + { + xend -= diff; + r_edgelen -= diff; + abortscanline = true; + } // part 1: left edge edge = yedge | 0x1; xlimit = xstart+l_edgelen; - if (xlimit > xend+1) xlimit = xend+1; - if (xlimit > 256) xlimit = 256; + if (xlimit > xend) xlimit = xend; if (l_edgecov & (1<<31)) { xcov = (l_edgecov >> 12) & 0x3FF; if (xcov == 0x3FF) xcov = 0; } - - for (; x < xlimit; x++) + + if (!l_filledge) x = xlimit; + else for (; x < xlimit; x++) { - if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true); - pixelsrendered++; - - if (!l_filledge) continue; u32 pixeladdr = (BufferOffset * ScanlineWidth) + x; u32 dstattr = AttrBuffer[pixeladdr]; @@ -1223,16 +1256,12 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) // part 2: polygon inside edge = yedge; - xlimit = xend-r_edgelen+1; - if (xlimit > xend+1) xlimit = xend+1; - if (xlimit > 256) xlimit = 256; - - for (; x < xlimit; x++) + xlimit = xend-r_edgelen; + if (xlimit > xend) xlimit = xend; + + if (wireframe && !edge) x = std::max(x, xlimit); + else for (; x < xlimit; x++) { - if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true); - pixelsrendered++; - - if (wireframe && !edge) continue; u32 pixeladdr = (BufferOffset * ScanlineWidth) + x; u32 dstattr = AttrBuffer[pixeladdr]; @@ -1315,20 +1344,16 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) // part 3: right edge edge = yedge | 0x2; - xlimit = xend+1; - if (xlimit > 256) xlimit = 256; + xlimit = xend; if (r_edgecov & (1<<31)) { xcov = (r_edgecov >> 12) & 0x3FF; if (xcov == 0x3FF) xcov = 0; } - //if (rp->SlopeR.Increment != 0 && DoTimings(PerRightSlope, odd)) return Step(rp, true); // should be fine to not immediately return? might be wrong + + if (r_filledge) for (; x < xlimit; x++) { - if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true); - pixelsrendered++; - - if (!r_filledge) continue; u32 pixeladdr = (BufferOffset * ScanlineWidth) + x; u32 dstattr = AttrBuffer[pixeladdr]; @@ -1415,7 +1440,8 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } - return Step(rp, false); + Step(rp); + return abortscanline; } bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd) @@ -1429,7 +1455,7 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd) if (abort) { CheckSlope(rp, y); - Step(rp, NULL); + Step(rp); } else if (y == polygon->YBottom && y != polygon->YTop) { diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index d9b925f3..43037281 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -455,6 +455,7 @@ private: melonDS::GPU& GPU; RendererPolygon PolygonList[2048]; bool DoTimings(s32 cycles, bool odd); + s32 DoTimingsPixels(u32 pixels, bool odd); void EndScanline(bool odd); void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha); u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t); @@ -462,7 +463,7 @@ private: void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y); void SetupPolygonRightEdge(RendererPolygon* rp, s32 y); void SetupPolygon(RendererPolygon* rp, Polygon* polygon); - bool Step(RendererPolygon* rp, bool abortscanline); + void Step(RendererPolygon* rp); void CheckSlope(RendererPolygon* rp, s32 y); void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd);