attempt to optimize quotient/remainder calc

This commit is contained in:
Jaklyy
2024-03-17 14:00:49 -04:00
parent ee91d7f8f3
commit 9ee9389ee1
2 changed files with 41 additions and 30 deletions

View File

@ -615,7 +615,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer);
} }
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
@ -642,7 +642,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer);
} }
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
@ -748,8 +748,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
// right vertical edges are pushed 1px to the left as long as either: // right vertical edges are pushed 1px to the left as long as either:
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
@ -834,7 +834,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
@ -856,7 +856,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -882,7 +882,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -908,7 +908,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -973,8 +973,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
// right vertical edges are pushed 1px to the left as long as either: // right vertical edges are pushed 1px to the left as long as either:
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
@ -1084,7 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
@ -1123,7 +1123,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1219,7 +1219,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1311,7 +1311,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath

View File

@ -69,23 +69,33 @@ private:
{ {
public: public:
constexpr Interpolator() {} constexpr Interpolator() {}
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1)
{ {
Setup(x0, x1, w0, w1); Setup(x0, x1, w0, w1, wbuffer, z0, z1);
} }
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0)
{ {
this->x0 = x0; this->x0 = x0;
this->x1 = x1; this->x1 = x1;
this->xdiff = x1 - x0; this->xdiff = x1 - x0;
this->wbuffer = wbuffer;
// calculate reciprocal for Z interpolation // calculate quotient and remainder for Z interpolation
// TODO eventually: use a faster reciprocal function? if (!dir && !wbuffer && xdiff != 0)
if (this->xdiff != 0) {
this->xrecip_z = (1<<22) / this->xdiff; if (z0 < z1)
else {
this->xrecip_z = 0; // remainder is unused for this path
this->zquo = ((z1 - z0) >> 1) / xdiff;
}
else
{
// should optimize down to one divide instruction
this->zquo = ((z0 - z1) >> 1) / xdiff;
this->zrem = ((z0 - z1) >> 1) % xdiff;
}
}
// linear mode is used if both W values are equal and have // linear mode is used if both W values are equal and have
// low-order bits cleared (0-6 along X, 1-6 along Y) // low-order bits cleared (0-6 along X, 1-6 along Y)
@ -164,7 +174,7 @@ private:
} }
} }
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const constexpr s32 InterpolateZ(s32 z0, s32 z1) const
{ {
if (xdiff == 0 || z0 == z1) return z0; if (xdiff == 0 || z0 == z1) return z0;
@ -194,9 +204,9 @@ private:
{ {
// these algorithms are weiiird but i can't argue with the results // these algorithms are weiiird but i can't argue with the results
if (z0 < z1) if (z0 < z1)
return z0 + ((z1-z0 >> 1) / xdiff * x << 1); return z0 + ((zquo * x) << 1);
else else
return z1 + (((z0-z1 >> 1) / xdiff * (xdiff-x)) + ((z0-z1 >> 1) % xdiff) << 1); return z1 + ((zquo * (xdiff-x) + zrem) << 1);
} }
} }
} }
@ -206,8 +216,9 @@ private:
int shift; int shift;
bool linear; bool linear;
bool wbuffer;
s32 xrecip_z; s32 zquo, zrem;
s32 w0n, w0d, w1d; s32 w0n, w0d, w1d;
u32 yfactor; u32 yfactor;
@ -231,7 +242,7 @@ private:
Increment = 0; Increment = 0;
XMajor = false; XMajor = false;
Interp.Setup(0, 0, 0, 0); Interp.Setup(0, 0, 0, 0, false);
Interp.SetX(0); Interp.SetX(0);
xcov_incr = 0; xcov_incr = 0;
@ -239,7 +250,7 @@ private:
return x0; return x0;
} }
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer)
{ {
this->x0 = x0; this->x0 = x0;
this->y = y; this->y = y;
@ -305,7 +316,7 @@ private:
s32 x = XVal(); s32 x = XVal();
int interpoffset = (Increment >= 0x40000) && (side ^ Negative); int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer);
Interp.SetX(y); Interp.SetX(y);
// used for calculating AA coverage // used for calculating AA coverage