From 95faca402a0bab96677b3f10475c3cee67b7bff4 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 19 Apr 2024 06:16:32 -0400 Subject: [PATCH] accuracy toggle + some attempt at understanding slopes --- src/GPU3D_Soft.cpp | 189 +++++++++++++++++++++++++++++++-------------- src/GPU3D_Soft.h | 13 ++-- 2 files changed, 141 insertions(+), 61 deletions(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 5da6cd78..da6acbc0 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -174,25 +174,71 @@ u32 SoftRenderer::DoTimingsPixels(s32 pixels, s32* timingcounter) else return 0; } -bool SoftRenderer::DoTimingsFirstPoly(RendererPolygon* rp, s32 y, s32* timingcounter) +void SoftRenderer::FindFirstPolyDoTimings(int npolys, s32 y, s32* timingcountereven, s32*timingcounterodd) { - // The first polygon in each scanline has an additional timing penalty (presumably due to pipelining?) + // TODO: actually figure this out - // First polygon has a cost of 4 cycles - if (!DoTimings(FirstPolyDelay, timingcounter)) return false; + // The First Polygon in each scanline pair has some additional timing penalties (presumably due to pipelining of the rasterizer) - // determine the timing impact of the first polygon's slopes. + bool fixeddelay = false; + bool perslope = false; + bool etc = false; + + for (int i = 0; i < npolys; i++) + { + RendererPolygon* rp = &PolygonList[i]; + Polygon* polygon = rp->PolyData; + + if (y >= polygon->YTop && y <= polygon->YBottom) + { + fixeddelay = true; + break; + if (y == polygon->YBottom) break; + if (y == polygon->YTop) {perslope = true; break;} + /*else if ((y == polygon->Vertices[rp->NextVL]->FinalPosition[1] || y == polygon->Vertices[rp->CurVL]->FinalPosition[1]) || + (y == polygon->Vertices[rp->NextVR]->FinalPosition[1] || y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) + { + perslope = true; + } + else */etc = true; + break; + } + } + + y++; + for (int i = 0; i < npolys; i++) + { + RendererPolygon* rp = &PolygonList[i]; + Polygon* polygon = rp->PolyData; + + if (y >= polygon->YTop && y <= polygon->YBottom) + { + fixeddelay = true; + break; + if (y == polygon->YBottom) break; + if (y == polygon->YTop) {perslope = true; break;} + /*else if ((y == polygon->Vertices[rp->NextVL]->FinalPosition[1] || y == polygon->Vertices[rp->CurVL]->FinalPosition[1]) || + (y == polygon->Vertices[rp->NextVR]->FinalPosition[1] || y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) + { + perslope = true; + } + else */etc = true; + break; + } + } - Polygon* polygon = rp->PolyData; - - if (polygon->YTop == polygon->YBottom) return true; // 0 px tall line polygons do not have slopes, and thus no timing penalty - if (y == polygon->YTop) return true; - - if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) *timingcounter += FirstPerSlope; - - if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) *timingcounter += FirstPerSlope; - - return DoTimings(FirstPerSlope*2, timingcounter); // CHECKME: does this need to be done every time its incremented here? does this even need to be done *at all?* + *timingcountereven = fixeddelay*FirstPolyDelay;// + perslope*FirstPerSlope + etc*2; + *timingcounterodd = fixeddelay*FirstPolyDelay;/// + perslope*FirstPerSlope + etc*2; + if (!perslope) + { + *timingcountereven += etc*2;// + perslope*FirstPerSlope + etc*2; + *timingcounterodd += etc*2;/// + perslope*FirstPerSlope + etc*2; + } + else + { + *timingcountereven += perslope*FirstPerSlope;// + perslope*FirstPerSlope + etc*2; + *timingcounterodd += perslope*FirstPerSlope;/// + perslope*FirstPerSlope + etc*2; + } } void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const @@ -779,6 +825,7 @@ void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y) } } +template bool SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter) { Polygon* polygon = rp->PolyData; @@ -912,16 +959,20 @@ bool SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* s32 xlimit; if (xend > 256) xend = 256; - // determine if the span can be rendered within the time allotted to the scanline - // TODO: verify the timing characteristics of shadow masks are the same as regular polygons. - s32 diff = DoTimingsPixels(xend-x, timingcounter); - if (diff != 0) + if (accuracy) { - xend -= diff; - r_edgelen -= diff; - abortscanline = true; + // determine if the span can be rendered within the time allotted to the scanline + // TODO: verify the timing characteristics of shadow masks are the same as regular polygons. + s32 diff = DoTimingsPixels(xend-x, timingcounter); + if (diff != 0) + { + xend -= diff; + r_edgelen -= diff; + abortscanline = true; + } + else abortscanline = false; } - else abortscanline = false; + else abortscanline = true; // for shadow masks: set stencil bits where the depth test fails. // draw nothing. @@ -1007,6 +1058,7 @@ bool SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* return abortscanline; } +template bool SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter) { Polygon* polygon = rp->PolyData; @@ -1163,13 +1215,17 @@ bool SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s32 xcov = 0; if (xend > 256) xend = 256; - // determine if the span can be rendered within the time allotted to the scanline - s32 diff = DoTimingsPixels(xend-x, timingcounter); - if (diff != 0) + if (accuracy) { - xend -= diff; - r_edgelen -= diff; - abortscanline = true; + // determine if the span can be rendered within the time allotted to the scanline + s32 diff = DoTimingsPixels(xend-x, timingcounter); + if (diff != 0) + { + xend -= diff; + r_edgelen -= diff; + abortscanline = true; + } + else abortscanline = false; } else abortscanline = false; @@ -1461,45 +1517,35 @@ bool SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 return abortscanline; } +template void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys, s32* timingcounter) { - *timingcounter = 0; bool abort = false; - bool first = true; for (int i = 0; i < npolys; i++) { RendererPolygon* rp = &PolygonList[i]; Polygon* polygon = rp->PolyData; - if (y == polygon->YBottom && y != polygon->YTop) + if (accuracy && y == polygon->YBottom && y != polygon->YTop) { - if (!abort) abort = (first && !DoTimings(FirstNull+EmptyPolyScanline, timingcounter)) || !DoTimings(EmptyPolyScanline, timingcounter); - - first = false; + if (!abort) abort = !DoTimings(EmptyPolyScanline, timingcounter); } else if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) - { - //if (y == polygon->YTop) if(!DoTimings(FirstPolyScanline, timingcounter)) abort = true; - - if (!abort) abort = (first && !DoTimingsFirstPoly(rp, y, timingcounter)) // incorrect. needs research; behavior is strange... - || !DoTimings(PerPolyScanline, timingcounter) - || (!CheckTimings(MinToStartPoly, timingcounter)); + { + if (accuracy && !abort) abort = (!DoTimings(PerPolyScanline, timingcounter) + || !CheckTimings(MinToStartPoly, timingcounter)); - if (abort) + if (accuracy && abort) { CheckSlope(rp, y); Step(rp); } else if (polygon->IsShadowMask) - abort = RenderShadowMaskScanline(gpu.GPU3D, rp, y, timingcounter); + abort = RenderShadowMaskScanline(gpu.GPU3D, rp, y, timingcounter); else - abort = RenderPolygonScanline(gpu, rp, y, timingcounter); - - first = false; + abort = RenderPolygonScanline(gpu, rp, y, timingcounter); } } - - return; } u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const @@ -1904,8 +1950,9 @@ void SoftRenderer::FinishPushScanline(s32 y, s32 pixelsremain) /* update sl timeout */\ ScanlineTimeout = SLRead[y-1] - FinalPassLen;\ \ - RenderScanline(gpu, y, j, &rastertimingeven);\ - RenderScanline(gpu, y+1, j, &rastertimingodd);\ + FindFirstPolyDoTimings(j, y, &rastertimingeven, &rastertimingodd);\ + RenderScanline(gpu, y, j, &rastertimingeven);\ + RenderScanline(gpu, y+1, j, &rastertimingodd);\ \ prevtimespent = timespent;\ RasterTiming += timespent = std::max(std::initializer_list {rastertimingeven, rastertimingodd, FinalPassLen});\ @@ -1914,7 +1961,27 @@ void SoftRenderer::FinishPushScanline(s32 y, s32 pixelsremain) /* set the underflow flag if one of the scanlines came within 14 cycles of visible underflow */\ if (ScanlineTimeout <= RasterTiming) gpu.GPU3D.RDLinesUnderflow = true; -void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) +void SoftRenderer::RenderPolygonsFast(GPU& gpu, Polygon** polygons, int npolys) +{ + int j = 0; + for (int i = 0; i < npolys; i++) + { + if (polygons[i]->Degenerate) continue; + SetupPolygon(&PolygonList[j++], polygons[i]); + } + int dummy; + RenderScanline(gpu, 0, j, &dummy); + + for (s32 y = 1; y < 192; y++) + { + RenderScanline(gpu, y, j, &dummy); + ScanlineFinalPass(gpu.GPU3D, y-1, true, true); + } + + ScanlineFinalPass(gpu.GPU3D, 191, true, true); +} + +void SoftRenderer::RenderPolygonsTiming(GPU& gpu, Polygon** polygons, int npolys) { int j = 0; for (int i = 0; i < npolys; i++) @@ -1932,10 +1999,10 @@ void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys) s32 nextread = 0, nextreadrd = 0; u32 timespent, prevtimespent; - + FindFirstPolyDoTimings(j, 0, &rastertimingeven, &rastertimingodd); // scanlines are rendered in pairs of two - RenderScanline(gpu, 0, j, &rastertimingeven); - RenderScanline(gpu, 1, j, &rastertimingodd); + RenderScanline(gpu, 0, j, &rastertimingeven); + RenderScanline(gpu, 1, j, &rastertimingodd); // it can't proceed to the next scanline unless all others steps are done (both scanlines in the pair, and final pass) RasterTiming = timespent = std::max(std::initializer_list {rastertimingeven, rastertimingodd, FinalPassLen}); @@ -2025,7 +2092,12 @@ void SoftRenderer::RenderFrame(GPU& gpu) ClearBuffers(gpu); if (gpu.GPU3D.RenderingEnabled >= 3) - RenderPolygons(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + { + if (Accuracy) + RenderPolygonsTiming(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + else + RenderPolygonsFast(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + } else memcpy(FinalBuffer, ColorBuffer, sizeof(FinalBuffer)); } @@ -2062,7 +2134,12 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu) ClearBuffers(gpu); if (gpu.GPU3D.RenderingEnabled >= 3) - RenderPolygons(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + { + if (Accuracy) + RenderPolygonsTiming(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + else + RenderPolygonsFast(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + } else { memcpy(FinalBuffer, ColorBuffer, sizeof(FinalBuffer)); diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 20aaf4bf..a9a15787 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -458,7 +458,7 @@ private: bool DoTimings(s32 cycles, s32* timingcounter); bool CheckTimings(s32 cycles, s32* timingcounter); u32 DoTimingsPixels(s32 pixels, s32* timingcounter); - bool DoTimingsFirstPoly(RendererPolygon* rp, s32 y, s32* timingcounter); + void FindFirstPolyDoTimings(int npolys, s32 y, s32* timingcountereven, s32*timingcounterodd); void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); @@ -467,9 +467,9 @@ private: void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; void Step(RendererPolygon* rp); void CheckSlope(RendererPolygon* rp, s32 y); - bool RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter); - bool RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter); - void RenderScanline(const GPU& gpu, s32 y, int npolys, s32* timingcounter); + template bool RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter); + template bool RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter); + template void RenderScanline(const GPU& gpu, s32 y, int npolys, s32* timingcounter); u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; bool CheckEdgeMarkingPixel(u32 polyid, u32 z, u32 pixeladdr); bool CheckEdgeMarkingClearPlane(const GPU3D& gpu3d, u32 polyid, u32 z); @@ -478,7 +478,8 @@ private: u16 BeginPushScanline(s32 y, s32 pixelstodraw); void ReadScanline(s32 y); void FinishPushScanline(s32 y, s32 pixelsremain); - void RenderPolygons(GPU& gpu, Polygon** polygons, int npolys); + void RenderPolygonsFast(GPU& gpu, Polygon** polygons, int npolys); + void RenderPolygonsTiming(GPU& gpu, Polygon** polygons, int npolys); void RenderThreadFunc(GPU& gpu); @@ -532,6 +533,8 @@ private: bool FrameIdentical; + bool Accuracy = true; // TODO + // threading bool Threaded;