diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 1a879abf..e8ac23b4 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -221,6 +221,12 @@ void GPU3D::Reset() noexcept DispCnt = 0; AlphaRefVal = 0; AlphaRef = 0; + + RDLines = 46; + RDLinesMin = 46; + RasterTimingCounterPrev = 0; + RasterTimingCounterOdd = 0; + RasterTimingCounterEven = 0; memset(ToonTable, 0, sizeof(ToonTable)); memset(EdgeTable, 0, sizeof(EdgeTable)); @@ -770,7 +776,40 @@ void GPU3D::StallPolygonPipeline(s32 delay, s32 nonstalldelay) noexcept } } +bool GPU3D::DoTimings(s32 cycles, bool odd) +{ + if (odd) + { + RasterTimingCounterOdd += cycles; + if ((RasterTimingCounterOdd + RasterTimingCounterPrev) < RasterTimingCap) return 0; + } + else + { + RasterTimingCounterEven += cycles; + if ((RasterTimingCounterEven + RasterTimingCounterPrev) < RasterTimingCap) return 0; + } + DispCnt |= (1<<12); + return 1; +} + +void GPU3D::EndScanline(bool odd) +{ + if (!odd) + { + RasterTimingCounterPrev += std::max(RasterTimingCounterOdd, RasterTimingCounterEven); + RasterTimingCounterPrev -= PerScanlineRecup; // wip + if (RasterTimingCounterPrev < 0) RasterTimingCounterPrev = 0; + // calc is wrong, seems to round up...? + RDLines = (RasterTimingCap - RasterTimingCounterPrev) / PerScanlineTiming; + // seems to display the lowest scanline buffer count reached during the current frame. + // we also caps it to 46 here, because this reg does that too for some reason. + if (RDLines > RDLinesMin) RDLines = RDLinesMin; + if (RDLines < RDLinesMin) RDLinesMin = RDLines; + RasterTimingCounterOdd = 0; + RasterTimingCounterEven = 0; + } +} template void ClipSegment(Vertex* outbuf, Vertex* vin, Vertex* vout) @@ -2369,6 +2408,10 @@ void GPU3D::CheckFIFODMA() noexcept void GPU3D::VCount144() noexcept { + RDLinesMin = 46; + RasterTimingCounterPrev = 0; + RasterTimingCounterOdd = 0; + RasterTimingCounterEven = 0; CurrentRenderer->VCount144(); } @@ -2612,7 +2655,7 @@ u16 GPU3D::Read16(u32 addr) noexcept return DispCnt; case 0x04000320: - return 46; // TODO, eventually + return RDLines; // IT IS TIME case 0x04000600: { @@ -2656,7 +2699,7 @@ u32 GPU3D::Read32(u32 addr) noexcept return DispCnt; case 0x04000320: - return 46; // TODO, eventually + return RDLines; // IT IS TIME case 0x04000600: { diff --git a/src/GPU3D.h b/src/GPU3D.h index dda78b78..924344f7 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -25,10 +25,20 @@ #include "Savestate.h" #include "FIFO.h" + namespace melonDS { class GPU; +// numbers based on 339 poly 64-172 horiz. line poly +static constexpr int RasterTimingCap = 51116; +static constexpr int PerPolyTiming = 12; +static constexpr int PerScanlineTiming = 1064; +static constexpr int PerScanlineRecup = 2010;//1910; +//static constexpr int EmptyPolyScanline; +//static constexpr int FirstPixelTiming; +static constexpr int PerPixelTiming = 1; + struct Vertex { s32 Position[4]; @@ -114,6 +124,9 @@ public: void WriteToGXFIFO(u32 val) noexcept; + bool DoTimings(s32 cycles, bool odd); + void EndScanline(bool odd); + [[nodiscard]] bool IsRendererAccelerated() const noexcept; [[nodiscard]] Renderer3D& GetCurrentRenderer() noexcept { return *CurrentRenderer; } [[nodiscard]] const Renderer3D& GetCurrentRenderer() const noexcept { return *CurrentRenderer; } @@ -126,6 +139,7 @@ public: void Write16(u32 addr, u16 val) noexcept; void Write32(u32 addr, u32 val) noexcept; void Blit() noexcept; + private: melonDS::NDS& NDS; typedef union @@ -242,6 +256,11 @@ public: bool RenderingEnabled = false; u32 DispCnt = 0; + u32 RDLines = 0; + u32 RDLinesMin = 0; + s32 RasterTimingCounterPrev = 0; + s32 RasterTimingCounterOdd = 0; + s32 RasterTimingCounterEven = 0; u8 AlphaRefVal = 0; u8 AlphaRef = 0; diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 03c6265e..1061228e 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -900,10 +900,11 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd) { + if (GPU.GPU3D.DoTimings(PerPolyTiming, odd)) return; + int pixelsrendered = 0; Polygon* polygon = rp->PolyData; - u32 polyattr = (polygon->Attr & 0x3F008000); if (!polygon->FacingView) polyattr |= (1<<4); @@ -1076,10 +1077,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) if (xcov == 0x3FF) xcov = 0; } - if (!l_filledge) x = xlimit; - else for (; x < xlimit; x++) { + if (pixelsrendered >= 4 && GPU.GPU3D.DoTimings(PerPixelTiming, odd)) return; + pixelsrendered++; + if (!l_filledge) continue; u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 dstattr = AttrBuffer[pixeladdr]; @@ -1172,10 +1174,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) if (xlimit > xend+1) xlimit = xend+1; if (xlimit > 256) xlimit = 256; - if (wireframe && !edge) x = std::max(x, xlimit); - else for (; x < xlimit; x++) { + if (pixelsrendered >= 4 && GPU.GPU3D.DoTimings(PerPixelTiming, odd)) return; + pixelsrendered++; + if (wireframe && !edge) continue; u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 dstattr = AttrBuffer[pixeladdr]; @@ -1265,9 +1268,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) if (xcov == 0x3FF) xcov = 0; } - if (r_filledge) for (; x < xlimit; x++) { + if (pixelsrendered >= 4 && GPU.GPU3D.DoTimings(PerPixelTiming, odd)) return; + pixelsrendered++; + if (!r_filledge) continue; u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 dstattr = AttrBuffer[pixeladdr]; @@ -1360,8 +1365,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) void SoftRenderer::RenderScanline(s32 y, int npolys) { + bool odd = !(y % 2); for (int i = 0; i < npolys; i++) { + if (GPU.GPU3D.DoTimings(0, odd)) break; + RendererPolygon* rp = &PolygonList[i]; Polygon* polygon = rp->PolyData; @@ -1370,9 +1378,10 @@ void SoftRenderer::RenderScanline(s32 y, int npolys) if (polygon->IsShadowMask) RenderShadowMaskScanline(rp, y); else - RenderPolygonScanline(rp, y); + RenderPolygonScanline(rp, y, odd); } } + GPU.GPU3D.EndScanline(odd); } u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 2f5664e2..e5cd44eb 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -461,7 +461,7 @@ private: void SetupPolygonRightEdge(RendererPolygon* rp, s32 y); void SetupPolygon(RendererPolygon* rp, Polygon* polygon); void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); - void RenderPolygonScanline(RendererPolygon* rp, s32 y); + void RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd); void RenderScanline(s32 y, int npolys); u32 CalculateFogDensity(u32 pixeladdr); void ScanlineFinalPass(s32 y); @@ -476,14 +476,17 @@ private: // TODO: check if the hardware can accidentally plot pixels // offscreen in that border - static constexpr int ScanlineWidth = 258; - static constexpr int NumScanlines = 194; + static constexpr int ScanlineWidth = 256; + static constexpr int NumScanlines = 192; + static constexpr int NumScanlinesRDLines = 192; + static constexpr int RDLinesBufferSize = ScanlineWidth * NumScanlinesRDLines; static constexpr int BufferSize = ScanlineWidth * NumScanlines; static constexpr int FirstPixelOffset = ScanlineWidth + 1; - u32 ColorBuffer[BufferSize * 2]; - u32 DepthBuffer[BufferSize * 2]; - u32 AttrBuffer[BufferSize * 2]; + u32 ColorBuffer[RDLinesBufferSize * 2]; + u32 DepthBuffer[RDLinesBufferSize * 2]; + u32 AttrBuffer[RDLinesBufferSize * 2]; + u32 FinalBuffer[BufferSize * 2]; // attribute buffer: // bit0-3: edge flags (left/right/top/bottom)