mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 05:17:40 -07:00
attempt to optimize quotient/remainder calc
This commit is contained in:
parent
ee91d7f8f3
commit
9ee9389ee1
@ -615,7 +615,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
|
||||
|
||||
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
|
||||
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
||||
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer);
|
||||
}
|
||||
|
||||
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
|
||||
@ -642,7 +642,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
|
||||
|
||||
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
|
||||
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
||||
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer);
|
||||
}
|
||||
|
||||
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
|
||||
@ -748,8 +748,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
||||
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
||||
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
||||
|
||||
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
|
||||
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
|
||||
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
|
||||
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
|
||||
|
||||
// right vertical edges are pushed 1px to the left as long as either:
|
||||
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
||||
@ -834,7 +834,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
||||
int edge;
|
||||
|
||||
s32 x = xstart;
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
|
||||
|
||||
if (x < 0) x = 0;
|
||||
s32 xlimit;
|
||||
@ -856,7 +856,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
||||
|
||||
interpX.SetX(x);
|
||||
|
||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
||||
s32 z = interpX.InterpolateZ(zl, zr);
|
||||
u32 dstattr = AttrBuffer[pixeladdr];
|
||||
|
||||
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
||||
@ -882,7 +882,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
||||
|
||||
interpX.SetX(x);
|
||||
|
||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
||||
s32 z = interpX.InterpolateZ(zl, zr);
|
||||
u32 dstattr = AttrBuffer[pixeladdr];
|
||||
|
||||
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
||||
@ -908,7 +908,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
||||
|
||||
interpX.SetX(x);
|
||||
|
||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
||||
s32 z = interpX.InterpolateZ(zl, zr);
|
||||
u32 dstattr = AttrBuffer[pixeladdr];
|
||||
|
||||
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
||||
@ -973,8 +973,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
||||
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
||||
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
||||
|
||||
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
|
||||
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
|
||||
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
|
||||
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
|
||||
|
||||
// right vertical edges are pushed 1px to the left as long as either:
|
||||
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
||||
@ -1084,7 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
||||
int edge;
|
||||
|
||||
s32 x = xstart;
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
|
||||
|
||||
if (x < 0) x = 0;
|
||||
s32 xlimit;
|
||||
@ -1123,7 +1123,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
||||
|
||||
interpX.SetX(x);
|
||||
|
||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
||||
s32 z = interpX.InterpolateZ(zl, zr);
|
||||
|
||||
// if depth test against the topmost pixel fails, test
|
||||
// against the pixel underneath
|
||||
@ -1219,7 +1219,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
||||
|
||||
interpX.SetX(x);
|
||||
|
||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
||||
s32 z = interpX.InterpolateZ(zl, zr);
|
||||
|
||||
// if depth test against the topmost pixel fails, test
|
||||
// against the pixel underneath
|
||||
@ -1311,7 +1311,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
||||
|
||||
interpX.SetX(x);
|
||||
|
||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
||||
s32 z = interpX.InterpolateZ(zl, zr);
|
||||
|
||||
// if depth test against the topmost pixel fails, test
|
||||
// against the pixel underneath
|
||||
|
@ -69,23 +69,33 @@ private:
|
||||
{
|
||||
public:
|
||||
constexpr Interpolator() {}
|
||||
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1)
|
||||
{
|
||||
Setup(x0, x1, w0, w1);
|
||||
Setup(x0, x1, w0, w1, wbuffer, z0, z1);
|
||||
}
|
||||
|
||||
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->x1 = x1;
|
||||
this->xdiff = x1 - x0;
|
||||
this->wbuffer = wbuffer;
|
||||
|
||||
// calculate reciprocal for Z interpolation
|
||||
// TODO eventually: use a faster reciprocal function?
|
||||
if (this->xdiff != 0)
|
||||
this->xrecip_z = (1<<22) / this->xdiff;
|
||||
else
|
||||
this->xrecip_z = 0;
|
||||
// calculate quotient and remainder for Z interpolation
|
||||
if (!dir && !wbuffer && xdiff != 0)
|
||||
{
|
||||
if (z0 < z1)
|
||||
{
|
||||
// remainder is unused for this path
|
||||
this->zquo = ((z1 - z0) >> 1) / xdiff;
|
||||
}
|
||||
else
|
||||
{
|
||||
// should optimize down to one divide instruction
|
||||
this->zquo = ((z0 - z1) >> 1) / xdiff;
|
||||
this->zrem = ((z0 - z1) >> 1) % xdiff;
|
||||
}
|
||||
}
|
||||
|
||||
// linear mode is used if both W values are equal and have
|
||||
// low-order bits cleared (0-6 along X, 1-6 along Y)
|
||||
@ -164,7 +174,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const
|
||||
constexpr s32 InterpolateZ(s32 z0, s32 z1) const
|
||||
{
|
||||
if (xdiff == 0 || z0 == z1) return z0;
|
||||
|
||||
@ -194,9 +204,9 @@ private:
|
||||
{
|
||||
// these algorithms are weiiird but i can't argue with the results
|
||||
if (z0 < z1)
|
||||
return z0 + ((z1-z0 >> 1) / xdiff * x << 1);
|
||||
return z0 + ((zquo * x) << 1);
|
||||
else
|
||||
return z1 + (((z0-z1 >> 1) / xdiff * (xdiff-x)) + ((z0-z1 >> 1) % xdiff) << 1);
|
||||
return z1 + ((zquo * (xdiff-x) + zrem) << 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -206,8 +216,9 @@ private:
|
||||
|
||||
int shift;
|
||||
bool linear;
|
||||
bool wbuffer;
|
||||
|
||||
s32 xrecip_z;
|
||||
s32 zquo, zrem;
|
||||
s32 w0n, w0d, w1d;
|
||||
|
||||
u32 yfactor;
|
||||
@ -231,7 +242,7 @@ private:
|
||||
Increment = 0;
|
||||
XMajor = false;
|
||||
|
||||
Interp.Setup(0, 0, 0, 0);
|
||||
Interp.Setup(0, 0, 0, 0, false);
|
||||
Interp.SetX(0);
|
||||
|
||||
xcov_incr = 0;
|
||||
@ -239,7 +250,7 @@ private:
|
||||
return x0;
|
||||
}
|
||||
|
||||
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
|
||||
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->y = y;
|
||||
@ -305,7 +316,7 @@ private:
|
||||
s32 x = XVal();
|
||||
|
||||
int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
|
||||
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1);
|
||||
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer);
|
||||
Interp.SetX(y);
|
||||
|
||||
// used for calculating AA coverage
|
||||
|
Loading…
Reference in New Issue
Block a user