From f4bae5a5d5e5e11267660affd524f345cf9abdc7 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 10 Feb 2024 17:39:40 -0500 Subject: [PATCH 01/13] best approximation so far --- src/GPU3D_Soft.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 9cfdf9ad..51a87339 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -194,22 +194,23 @@ private: disp = z0 - z1, factor = xdiff - x; } - + /* if (dir) { - int shift = 0; - while (disp > 0x3FF) + return base + disp * factor / xdiff; + } + else*/ + { + u32 recip, recip2; + u32 shift = 0; + recip2 = recip = (factor << 16) / xdiff; + while (recip2 > 0x100) { - disp >>= 1; + recip2 >>= 1; shift++; } - - return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); - } - else - { - disp >>= 9; - return base + (((s64)disp * factor * xrecip_z) >> 13); + disp >>= shift; + return base + ((disp * recip) >> (16 - shift)); } } } From 2c457de681c9c895e02c463c37e5098c5165b485 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 10 Feb 2024 20:54:35 -0500 Subject: [PATCH 02/13] rework to actually work --- src/GPU3D_Soft.h | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 51a87339..58313800 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -180,37 +180,40 @@ private: { // Z-buffering: linear interpolation // still doesn't quite match hardware... - s32 base = 0, disp = 0, factor = 0; + s32 disp = 0; if (z0 < z1) { - base = z0; disp = z1 - z0; - factor = x; } else { - base = z1; - disp = z0 - z1, - factor = xdiff - x; + disp = z0 - z1; } - /* - if (dir) + + /*if (dir) { - return base + disp * factor / xdiff; + if (z0 < z1) return z0 + ((z1 - z0) * x / xdiff); + else return z1 + ((z0 - z1) - ((z0 - z1) * x / xdiff)); + }*/ + + u32 recip, recip2; + u32 shift = 0; + recip2 = recip = (x << 16) / xdiff; + while (recip2 > 0x100) + { + recip2 >>= 1; + shift++; } - else*/ + disp >>= shift; + + if (z0 < z1) { - u32 recip, recip2; - u32 shift = 0; - recip2 = recip = (factor << 16) / xdiff; - while (recip2 > 0x100) - { - recip2 >>= 1; - shift++; - } - disp >>= shift; - return base + ((disp * recip) >> (16 - shift)); + return z0 + ((disp * recip) >> (16 - shift)); + } + else + { + return z1 + ((z0-z1) - ((disp * recip) >> (16 - shift))); } } } From 668e5580e0956815e85589d6136dca1429201475 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 16 Mar 2024 06:51:07 -0400 Subject: [PATCH 03/13] better approximation --- src/GPU3D_Soft.h | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 58313800..e3e89b66 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -180,41 +180,11 @@ private: { // Z-buffering: linear interpolation // still doesn't quite match hardware... - s32 disp = 0; if (z0 < z1) - { - disp = z1 - z0; - } + return z0 + (((z1 - z0) / (xdiff << 1)) * (x<<1)); else - { - disp = z0 - z1; - } - - /*if (dir) - { - if (z0 < z1) return z0 + ((z1 - z0) * x / xdiff); - else return z1 + ((z0 - z1) - ((z0 - z1) * x / xdiff)); - }*/ - - u32 recip, recip2; - u32 shift = 0; - recip2 = recip = (x << 16) / xdiff; - while (recip2 > 0x100) - { - recip2 >>= 1; - shift++; - } - disp >>= shift; - - if (z0 < z1) - { - return z0 + ((disp * recip) >> (16 - shift)); - } - else - { - return z1 + ((z0-z1) - ((disp * recip) >> (16 - shift))); - } + return z1 + (((z0 - z1) / (xdiff << 1)) * (xdiff-x<<1)); } } From 672e6d03faa93b38eca455da4d1bb00e39ee5b1d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:09:47 -0400 Subject: [PATCH 04/13] betterer approximation --- src/GPU3D_Soft.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index e3e89b66..06d132b0 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -181,10 +181,22 @@ private: // Z-buffering: linear interpolation // still doesn't quite match hardware... - if (z0 < z1) - return z0 + (((z1 - z0) / (xdiff << 1)) * (x<<1)); + if (dir) + { + // seems like y dir does different interpolation? + // this probably isn't right... + if (z0 < z1) + return z0 + (z1-z0) * x / xdiff; + else + return z1 + (z0-z1) * (xdiff-x) / xdiff; + } else - return z1 + (((z0 - z1) / (xdiff << 1)) * (xdiff-x<<1)); + { + if (z0 < z1) + return z0 + (((z1-z0) / xdiff & ~0x1) * x); + else + return z1 + (((z0-z1) / xdiff & ~0x1) * (xdiff-x) + ((z0-z1) & 0xFF)); + } } } From b6fa43b0cffb589165283600b14d682e935bb828 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 16 Mar 2024 20:17:24 -0400 Subject: [PATCH 05/13] implement approximation of z0 > z1 case --- src/GPU3D_Soft.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 06d132b0..989cebf3 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -183,7 +183,7 @@ private: if (dir) { - // seems like y dir does different interpolation? + // seems like y dir does different interpolation than x? // this probably isn't right... if (z0 < z1) return z0 + (z1-z0) * x / xdiff; @@ -195,7 +195,7 @@ private: if (z0 < z1) return z0 + (((z1-z0) / xdiff & ~0x1) * x); else - return z1 + (((z0-z1) / xdiff & ~0x1) * (xdiff-x) + ((z0-z1) & 0xFF)); + return z1 + (((z0-z1) / xdiff & ~0x1) * (xdiff-x)) + ((z0-z1) % (xdiff << 1)); } } } From ee91d7f8f31c927d4a8f9767a2ead971a42c58bc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 17 Mar 2024 10:42:52 -0400 Subject: [PATCH 06/13] notes + optimization should only compile to one div instruction per path now --- src/GPU3D_Soft.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 989cebf3..6406ffba 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -179,23 +179,24 @@ private: else { // Z-buffering: linear interpolation - // still doesn't quite match hardware... + // not perfect, but close if (dir) { - // seems like y dir does different interpolation than x? - // this probably isn't right... + // interpolating along y uses a different algorithm than x + // this algo probably isn't quite right though... if (z0 < z1) - return z0 + (z1-z0) * x / xdiff; + return z0 + (s64)(z1-z0) * x / xdiff; else - return z1 + (z0-z1) * (xdiff-x) / xdiff; + return z1 + (s64)(z0-z1) * (xdiff-x) / xdiff; } else { + // these algorithms are weiiird but i can't argue with the results if (z0 < z1) - return z0 + (((z1-z0) / xdiff & ~0x1) * x); + return z0 + ((z1-z0 >> 1) / xdiff * x << 1); else - return z1 + (((z0-z1) / xdiff & ~0x1) * (xdiff-x)) + ((z0-z1) % (xdiff << 1)); + return z1 + (((z0-z1 >> 1) / xdiff * (xdiff-x)) + ((z0-z1 >> 1) % xdiff) << 1); } } } From 9ee9389ee189fa4e08afaec5594754adb3271353 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 17 Mar 2024 14:00:49 -0400 Subject: [PATCH 07/13] attempt to optimize quotient/remainder calc --- src/GPU3D_Soft.cpp | 28 ++++++++++++++-------------- src/GPU3D_Soft.h | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 74027d5b..c92af7ab 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -615,7 +615,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], - polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer); } void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const @@ -642,7 +642,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], - polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); + polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer); } void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const @@ -748,8 +748,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -834,7 +834,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr); if (x < 0) x = 0; s32 xlimit; @@ -856,7 +856,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -882,7 +882,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -908,7 +908,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -973,8 +973,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -1084,7 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr); if (x < 0) x = 0; s32 xlimit; @@ -1123,7 +1123,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1219,7 +1219,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1311,7 +1311,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 6406ffba..4aa5e77f 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -69,23 +69,33 @@ private: { public: constexpr Interpolator() {} - constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) + constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1) { - Setup(x0, x1, w0, w1); + Setup(x0, x1, w0, w1, wbuffer, z0, z1); } - constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) + constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0) { this->x0 = x0; this->x1 = x1; this->xdiff = x1 - x0; + this->wbuffer = wbuffer; - // calculate reciprocal for Z interpolation - // TODO eventually: use a faster reciprocal function? - if (this->xdiff != 0) - this->xrecip_z = (1<<22) / this->xdiff; - else - this->xrecip_z = 0; + // calculate quotient and remainder for Z interpolation + if (!dir && !wbuffer && xdiff != 0) + { + if (z0 < z1) + { + // remainder is unused for this path + this->zquo = ((z1 - z0) >> 1) / xdiff; + } + else + { + // should optimize down to one divide instruction + this->zquo = ((z0 - z1) >> 1) / xdiff; + this->zrem = ((z0 - z1) >> 1) % xdiff; + } + } // linear mode is used if both W values are equal and have // low-order bits cleared (0-6 along X, 1-6 along Y) @@ -164,7 +174,7 @@ private: } } - constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const + constexpr s32 InterpolateZ(s32 z0, s32 z1) const { if (xdiff == 0 || z0 == z1) return z0; @@ -194,9 +204,9 @@ private: { // these algorithms are weiiird but i can't argue with the results if (z0 < z1) - return z0 + ((z1-z0 >> 1) / xdiff * x << 1); + return z0 + ((zquo * x) << 1); else - return z1 + (((z0-z1 >> 1) / xdiff * (xdiff-x)) + ((z0-z1 >> 1) % xdiff) << 1); + return z1 + ((zquo * (xdiff-x) + zrem) << 1); } } } @@ -206,8 +216,9 @@ private: int shift; bool linear; + bool wbuffer; - s32 xrecip_z; + s32 zquo, zrem; s32 w0n, w0d, w1d; u32 yfactor; @@ -231,7 +242,7 @@ private: Increment = 0; XMajor = false; - Interp.Setup(0, 0, 0, 0); + Interp.Setup(0, 0, 0, 0, false); Interp.SetX(0); xcov_incr = 0; @@ -239,7 +250,7 @@ private: return x0; } - constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) + constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer) { this->x0 = x0; this->y = y; @@ -305,7 +316,7 @@ private: s32 x = XVal(); int interpoffset = (Increment >= 0x40000) && (side ^ Negative); - Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); + Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer); Interp.SetX(y); // used for calculating AA coverage From c5b9c3d36de4a72701dcd76e2256e95f3448fffb Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 29 Mar 2024 13:38:12 -0400 Subject: [PATCH 08/13] optimization attempt --- src/GPU3D_Soft.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 4aa5e77f..9e1e2c13 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -87,13 +87,16 @@ private: if (z0 < z1) { // remainder is unused for this path - this->zquo = ((z1 - z0) >> 1) / xdiff; + this->zquo = ((z1 - z0) >> 1) / xdiff << 1; + this->zcounter = z0; } else { // should optimize down to one divide instruction - this->zquo = ((z0 - z1) >> 1) / xdiff; - this->zrem = ((z0 - z1) >> 1) % xdiff; + this->zquo = ((z0 - z1) >> 1) / xdiff << 1; + s32 rem = ((z0 - z1) >> 1) % xdiff << 1; + s32 idk = zquo * xdiff; + this->zcounter = z1 + idk + rem; } } @@ -154,7 +157,7 @@ private: constexpr s32 Interpolate(s32 y0, s32 y1) const { - if (xdiff == 0 || y0 == y1) return y0; + if (x == 0 || y0 == y1) return y0; if (!linear) { @@ -174,9 +177,9 @@ private: } } - constexpr s32 InterpolateZ(s32 z0, s32 z1) const + constexpr s32 InterpolateZ(s32 z0, s32 z1) { - if (xdiff == 0 || z0 == z1) return z0; + if (x == 0 || z0 == z1) return z0; if (wbuffer) { @@ -204,9 +207,15 @@ private: { // these algorithms are weiiird but i can't argue with the results if (z0 < z1) - return z0 + ((zquo * x) << 1); + { + zcounter += zquo; + return zcounter; + } else - return z1 + ((zquo * (xdiff-x) + zrem) << 1); + { + zcounter -= zquo; + return zcounter; + } } } } @@ -218,7 +227,8 @@ private: bool linear; bool wbuffer; - s32 zquo, zrem; + s32 zquo; + s32 zcounter; s32 w0n, w0d, w1d; u32 yfactor; From 8f450faa5657a1a0f0a7f89c68b13fddb29988eb Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 29 Mar 2024 14:06:52 -0400 Subject: [PATCH 09/13] idk --- src/GPU3D_Soft.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 9e1e2c13..3962f5c7 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -95,8 +95,7 @@ private: // should optimize down to one divide instruction this->zquo = ((z0 - z1) >> 1) / xdiff << 1; s32 rem = ((z0 - z1) >> 1) % xdiff << 1; - s32 idk = zquo * xdiff; - this->zcounter = z1 + idk + rem; + this->zcounter = z1 + (zquo * xdiff) + rem; } } From 3f4221560225898df9719ee3f8742f97defc2fb7 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 30 Mar 2024 08:05:47 -0400 Subject: [PATCH 10/13] ok this makes a *lot* more sense --- src/GPU3D_Soft.h | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 3962f5c7..d19db7e4 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -84,19 +84,9 @@ private: // calculate quotient and remainder for Z interpolation if (!dir && !wbuffer && xdiff != 0) { - if (z0 < z1) - { - // remainder is unused for this path - this->zquo = ((z1 - z0) >> 1) / xdiff << 1; - this->zcounter = z0; - } - else - { - // should optimize down to one divide instruction - this->zquo = ((z0 - z1) >> 1) / xdiff << 1; - s32 rem = ((z0 - z1) >> 1) % xdiff << 1; - this->zcounter = z1 + (zquo * xdiff) + rem; - } + // remainder is unused for this path + this->zquo = ((z1 - z0) >> 1) / xdiff << 1; + this->zcounter = z0; } // linear mode is used if both W values are equal and have @@ -204,17 +194,7 @@ private: } else { - // these algorithms are weiiird but i can't argue with the results - if (z0 < z1) - { - zcounter += zquo; - return zcounter; - } - else - { - zcounter -= zquo; - return zcounter; - } + return zcounter += zquo; } } } From fd650cf1334531aad36d8d27312a668ec9d33972 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:23:41 -0400 Subject: [PATCH 11/13] minor clean up i forgot to do --- src/GPU3D_Soft.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index d19db7e4..ddb3f7b2 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -81,11 +81,10 @@ private: this->xdiff = x1 - x0; this->wbuffer = wbuffer; - // calculate quotient and remainder for Z interpolation + // calculate increment and init counter for Z interpolation if (!dir && !wbuffer && xdiff != 0) { - // remainder is unused for this path - this->zquo = ((z1 - z0) >> 1) / xdiff << 1; + this->zincr = ((z1 - z0) >> 1) / xdiff << 1; this->zcounter = z0; } @@ -194,7 +193,7 @@ private: } else { - return zcounter += zquo; + return zcounter += zincr; } } } @@ -206,7 +205,7 @@ private: bool linear; bool wbuffer; - s32 zquo; + s32 zincr; s32 zcounter; s32 w0n, w0d, w1d; From 60b28d846f90a82281493313d5169810a6c7e1bb Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 6 Apr 2024 17:55:03 -0400 Subject: [PATCH 12/13] make it more clear what's actually being done --- src/GPU3D_Soft.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index ddb3f7b2..a4342431 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -193,6 +193,8 @@ private: } else { + // unoptimized algorithm is: z0 + ((z1-z0 >> 1) / xdiff * x << 1); + // or alternatively there's: z0 + (z1-z0) / (xdiff<<1) * (x<<1); return zcounter += zincr; } } From 19e8774ad06a194a1d446ad4d41b0407782327b6 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 10 May 2024 05:44:51 -0400 Subject: [PATCH 13/13] fix crash under freak circumstances --- src/GPU3D_Soft.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index a4342431..4783dff3 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -145,7 +145,7 @@ private: constexpr s32 Interpolate(s32 y0, s32 y1) const { - if (x == 0 || y0 == y1) return y0; + if (x == 0 || xdiff == 0 || y0 == y1) return y0; if (!linear) { @@ -167,7 +167,7 @@ private: constexpr s32 InterpolateZ(s32 z0, s32 z1) { - if (x == 0 || z0 == z1) return z0; + if (x == 0 || xdiff == 0 || z0 == z1) return z0; if (wbuffer) {