From 9ee9389ee189fa4e08afaec5594754adb3271353 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 17 Mar 2024 14:00:49 -0400 Subject: [PATCH] attempt to optimize quotient/remainder calc --- src/GPU3D_Soft.cpp | 28 ++++++++++++++-------------- src/GPU3D_Soft.h | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 74027d5b..c92af7ab 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -615,7 +615,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], - polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer); } void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const @@ -642,7 +642,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], - polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); + polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer); } void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const @@ -748,8 +748,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -834,7 +834,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr); if (x < 0) x = 0; s32 xlimit; @@ -856,7 +856,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -882,7 +882,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -908,7 +908,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -973,8 +973,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -1084,7 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr); if (x < 0) x = 0; s32 xlimit; @@ -1123,7 +1123,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1219,7 +1219,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1311,7 +1311,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 6406ffba..4aa5e77f 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -69,23 +69,33 @@ private: { public: constexpr Interpolator() {} - constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) + constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1) { - Setup(x0, x1, w0, w1); + Setup(x0, x1, w0, w1, wbuffer, z0, z1); } - constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) + constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0) { this->x0 = x0; this->x1 = x1; this->xdiff = x1 - x0; + this->wbuffer = wbuffer; - // calculate reciprocal for Z interpolation - // TODO eventually: use a faster reciprocal function? - if (this->xdiff != 0) - this->xrecip_z = (1<<22) / this->xdiff; - else - this->xrecip_z = 0; + // calculate quotient and remainder for Z interpolation + if (!dir && !wbuffer && xdiff != 0) + { + if (z0 < z1) + { + // remainder is unused for this path + this->zquo = ((z1 - z0) >> 1) / xdiff; + } + else + { + // should optimize down to one divide instruction + this->zquo = ((z0 - z1) >> 1) / xdiff; + this->zrem = ((z0 - z1) >> 1) % xdiff; + } + } // linear mode is used if both W values are equal and have // low-order bits cleared (0-6 along X, 1-6 along Y) @@ -164,7 +174,7 @@ private: } } - constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const + constexpr s32 InterpolateZ(s32 z0, s32 z1) const { if (xdiff == 0 || z0 == z1) return z0; @@ -194,9 +204,9 @@ private: { // these algorithms are weiiird but i can't argue with the results if (z0 < z1) - return z0 + ((z1-z0 >> 1) / xdiff * x << 1); + return z0 + ((zquo * x) << 1); else - return z1 + (((z0-z1 >> 1) / xdiff * (xdiff-x)) + ((z0-z1 >> 1) % xdiff) << 1); + return z1 + ((zquo * (xdiff-x) + zrem) << 1); } } } @@ -206,8 +216,9 @@ private: int shift; bool linear; + bool wbuffer; - s32 xrecip_z; + s32 zquo, zrem; s32 w0n, w0d, w1d; u32 yfactor; @@ -231,7 +242,7 @@ private: Increment = 0; XMajor = false; - Interp.Setup(0, 0, 0, 0); + Interp.Setup(0, 0, 0, 0, false); Interp.SetX(0); xcov_incr = 0; @@ -239,7 +250,7 @@ private: return x0; } - constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) + constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer) { this->x0 = x0; this->y = y; @@ -305,7 +316,7 @@ private: s32 x = XVal(); int interpoffset = (Increment >= 0x40000) && (side ^ Negative); - Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); + Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer); Interp.SetX(y); // used for calculating AA coverage