diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index a9d0bd64..c9c0dfbc 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -622,7 +622,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], - polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer); } void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const @@ -649,7 +649,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], - polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); + polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer); } void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const @@ -755,8 +755,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -841,7 +841,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr); if (x < 0) x = 0; s32 xlimit; @@ -863,7 +863,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -889,7 +889,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -915,7 +915,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -980,8 +980,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -1091,7 +1091,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 int edge; s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr); if (x < 0) x = 0; s32 xlimit; @@ -1130,7 +1130,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1226,7 +1226,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1318,7 +1318,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr); // if depth test against the topmost pixel fails, test // against the pixel underneath diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 73d02e4f..daad79cd 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -69,23 +69,24 @@ private: { public: constexpr Interpolator() {} - constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) + constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1) { - Setup(x0, x1, w0, w1); + Setup(x0, x1, w0, w1, wbuffer, z0, z1); } - constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) + constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0) { this->x0 = x0; this->x1 = x1; this->xdiff = x1 - x0; + this->wbuffer = wbuffer; - // calculate reciprocal for Z interpolation - // TODO eventually: use a faster reciprocal function? - if (this->xdiff != 0) - this->xrecip_z = (1<<22) / this->xdiff; - else - this->xrecip_z = 0; + // calculate increment and init counter for Z interpolation + if (!dir && !wbuffer && xdiff != 0) + { + this->zincr = ((z1 - z0) >> 1) / xdiff << 1; + this->zcounter = z0; + } // linear mode is used if both W values are equal and have // low-order bits cleared (0-6 along X, 1-6 along Y) @@ -144,7 +145,7 @@ private: constexpr s32 Interpolate(s32 y0, s32 y1) const { - if (xdiff == 0 || y0 == y1) return y0; + if (x == 0 || xdiff == 0 || y0 == y1) return y0; if (!linear) { @@ -164,9 +165,9 @@ private: } } - constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const + constexpr s32 InterpolateZ(s32 z0, s32 z1) { - if (xdiff == 0 || z0 == z1) return z0; + if (x == 0 || xdiff == 0 || z0 == z1) return z0; if (wbuffer) { @@ -179,37 +180,22 @@ private: else { // Z-buffering: linear interpolation - // still doesn't quite match hardware... - s32 base = 0, disp = 0, factor = 0; - - if (z0 < z1) - { - base = z0; - disp = z1 - z0; - factor = x; - } - else - { - base = z1; - disp = z0 - z1, - factor = xdiff - x; - } + // not perfect, but close if (dir) { - int shift = 0; - while (disp > 0x3FF) - { - disp >>= 1; - shift++; - } - - return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); + // interpolating along y uses a different algorithm than x + // this algo probably isn't quite right though... + if (z0 < z1) + return z0 + (s64)(z1-z0) * x / xdiff; + else + return z1 + (s64)(z0-z1) * (xdiff-x) / xdiff; } else { - disp >>= 9; - return base + (((s64)disp * factor * xrecip_z) >> 13); + // unoptimized algorithm is: z0 + ((z1-z0 >> 1) / xdiff * x << 1); + // or alternatively there's: z0 + (z1-z0) / (xdiff<<1) * (x<<1); + return zcounter += zincr; } } } @@ -219,8 +205,10 @@ private: int shift; bool linear; + bool wbuffer; - s32 xrecip_z; + s32 zincr; + s32 zcounter; s32 w0n, w0d, w1d; u32 yfactor; @@ -244,7 +232,7 @@ private: Increment = 0; XMajor = false; - Interp.Setup(0, 0, 0, 0); + Interp.Setup(0, 0, 0, 0, false); Interp.SetX(0); xcov_incr = 0; @@ -252,7 +240,7 @@ private: return x0; } - constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) + constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer) { this->x0 = x0; this->y = y; @@ -318,7 +306,7 @@ private: s32 x = XVal(); int interpoffset = (Increment >= 0x40000) && (side ^ Negative); - Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); + Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer); Interp.SetX(y); // used for calculating AA coverage