mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-27 00:00:07 -06:00
attempt to optimize quotient/remainder calc
This commit is contained in:
@ -615,7 +615,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
|
|||||||
|
|
||||||
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
||||||
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
|
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
|
||||||
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
|
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
|
||||||
@ -642,7 +642,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
|
|||||||
|
|
||||||
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
||||||
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
|
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
|
||||||
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
|
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
|
||||||
@ -748,8 +748,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||||||
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
||||||
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
||||||
|
|
||||||
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
|
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
|
||||||
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
|
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
|
||||||
|
|
||||||
// right vertical edges are pushed 1px to the left as long as either:
|
// right vertical edges are pushed 1px to the left as long as either:
|
||||||
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
||||||
@ -834,7 +834,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||||||
int edge;
|
int edge;
|
||||||
|
|
||||||
s32 x = xstart;
|
s32 x = xstart;
|
||||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
|
||||||
|
|
||||||
if (x < 0) x = 0;
|
if (x < 0) x = 0;
|
||||||
s32 xlimit;
|
s32 xlimit;
|
||||||
@ -856,7 +856,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||||||
|
|
||||||
interpX.SetX(x);
|
interpX.SetX(x);
|
||||||
|
|
||||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
s32 z = interpX.InterpolateZ(zl, zr);
|
||||||
u32 dstattr = AttrBuffer[pixeladdr];
|
u32 dstattr = AttrBuffer[pixeladdr];
|
||||||
|
|
||||||
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
||||||
@ -882,7 +882,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||||||
|
|
||||||
interpX.SetX(x);
|
interpX.SetX(x);
|
||||||
|
|
||||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
s32 z = interpX.InterpolateZ(zl, zr);
|
||||||
u32 dstattr = AttrBuffer[pixeladdr];
|
u32 dstattr = AttrBuffer[pixeladdr];
|
||||||
|
|
||||||
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
||||||
@ -908,7 +908,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||||||
|
|
||||||
interpX.SetX(x);
|
interpX.SetX(x);
|
||||||
|
|
||||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
s32 z = interpX.InterpolateZ(zl, zr);
|
||||||
u32 dstattr = AttrBuffer[pixeladdr];
|
u32 dstattr = AttrBuffer[pixeladdr];
|
||||||
|
|
||||||
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
|
||||||
@ -973,8 +973,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||||||
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
|
||||||
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
|
||||||
|
|
||||||
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
|
s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL]);
|
||||||
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
|
s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR]);
|
||||||
|
|
||||||
// right vertical edges are pushed 1px to the left as long as either:
|
// right vertical edges are pushed 1px to the left as long as either:
|
||||||
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
|
||||||
@ -1084,7 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||||||
int edge;
|
int edge;
|
||||||
|
|
||||||
s32 x = xstart;
|
s32 x = xstart;
|
||||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
Interpolator<0> interpX(xstart, xend+1, wl, wr, polygon->WBuffer, zl, zr);
|
||||||
|
|
||||||
if (x < 0) x = 0;
|
if (x < 0) x = 0;
|
||||||
s32 xlimit;
|
s32 xlimit;
|
||||||
@ -1123,7 +1123,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||||||
|
|
||||||
interpX.SetX(x);
|
interpX.SetX(x);
|
||||||
|
|
||||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
s32 z = interpX.InterpolateZ(zl, zr);
|
||||||
|
|
||||||
// if depth test against the topmost pixel fails, test
|
// if depth test against the topmost pixel fails, test
|
||||||
// against the pixel underneath
|
// against the pixel underneath
|
||||||
@ -1219,7 +1219,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||||||
|
|
||||||
interpX.SetX(x);
|
interpX.SetX(x);
|
||||||
|
|
||||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
s32 z = interpX.InterpolateZ(zl, zr);
|
||||||
|
|
||||||
// if depth test against the topmost pixel fails, test
|
// if depth test against the topmost pixel fails, test
|
||||||
// against the pixel underneath
|
// against the pixel underneath
|
||||||
@ -1311,7 +1311,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||||||
|
|
||||||
interpX.SetX(x);
|
interpX.SetX(x);
|
||||||
|
|
||||||
s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
|
s32 z = interpX.InterpolateZ(zl, zr);
|
||||||
|
|
||||||
// if depth test against the topmost pixel fails, test
|
// if depth test against the topmost pixel fails, test
|
||||||
// against the pixel underneath
|
// against the pixel underneath
|
||||||
|
@ -69,23 +69,33 @@ private:
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
constexpr Interpolator() {}
|
constexpr Interpolator() {}
|
||||||
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
|
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0, s32 z1)
|
||||||
{
|
{
|
||||||
Setup(x0, x1, w0, w1);
|
Setup(x0, x1, w0, w1, wbuffer, z0, z1);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
|
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1, bool wbuffer, s32 z0 = 0, s32 z1 = 0)
|
||||||
{
|
{
|
||||||
this->x0 = x0;
|
this->x0 = x0;
|
||||||
this->x1 = x1;
|
this->x1 = x1;
|
||||||
this->xdiff = x1 - x0;
|
this->xdiff = x1 - x0;
|
||||||
|
this->wbuffer = wbuffer;
|
||||||
|
|
||||||
// calculate reciprocal for Z interpolation
|
// calculate quotient and remainder for Z interpolation
|
||||||
// TODO eventually: use a faster reciprocal function?
|
if (!dir && !wbuffer && xdiff != 0)
|
||||||
if (this->xdiff != 0)
|
{
|
||||||
this->xrecip_z = (1<<22) / this->xdiff;
|
if (z0 < z1)
|
||||||
|
{
|
||||||
|
// remainder is unused for this path
|
||||||
|
this->zquo = ((z1 - z0) >> 1) / xdiff;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
this->xrecip_z = 0;
|
{
|
||||||
|
// should optimize down to one divide instruction
|
||||||
|
this->zquo = ((z0 - z1) >> 1) / xdiff;
|
||||||
|
this->zrem = ((z0 - z1) >> 1) % xdiff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// linear mode is used if both W values are equal and have
|
// linear mode is used if both W values are equal and have
|
||||||
// low-order bits cleared (0-6 along X, 1-6 along Y)
|
// low-order bits cleared (0-6 along X, 1-6 along Y)
|
||||||
@ -164,7 +174,7 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const
|
constexpr s32 InterpolateZ(s32 z0, s32 z1) const
|
||||||
{
|
{
|
||||||
if (xdiff == 0 || z0 == z1) return z0;
|
if (xdiff == 0 || z0 == z1) return z0;
|
||||||
|
|
||||||
@ -194,9 +204,9 @@ private:
|
|||||||
{
|
{
|
||||||
// these algorithms are weiiird but i can't argue with the results
|
// these algorithms are weiiird but i can't argue with the results
|
||||||
if (z0 < z1)
|
if (z0 < z1)
|
||||||
return z0 + ((z1-z0 >> 1) / xdiff * x << 1);
|
return z0 + ((zquo * x) << 1);
|
||||||
else
|
else
|
||||||
return z1 + (((z0-z1 >> 1) / xdiff * (xdiff-x)) + ((z0-z1 >> 1) % xdiff) << 1);
|
return z1 + ((zquo * (xdiff-x) + zrem) << 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -206,8 +216,9 @@ private:
|
|||||||
|
|
||||||
int shift;
|
int shift;
|
||||||
bool linear;
|
bool linear;
|
||||||
|
bool wbuffer;
|
||||||
|
|
||||||
s32 xrecip_z;
|
s32 zquo, zrem;
|
||||||
s32 w0n, w0d, w1d;
|
s32 w0n, w0d, w1d;
|
||||||
|
|
||||||
u32 yfactor;
|
u32 yfactor;
|
||||||
@ -231,7 +242,7 @@ private:
|
|||||||
Increment = 0;
|
Increment = 0;
|
||||||
XMajor = false;
|
XMajor = false;
|
||||||
|
|
||||||
Interp.Setup(0, 0, 0, 0);
|
Interp.Setup(0, 0, 0, 0, false);
|
||||||
Interp.SetX(0);
|
Interp.SetX(0);
|
||||||
|
|
||||||
xcov_incr = 0;
|
xcov_incr = 0;
|
||||||
@ -239,7 +250,7 @@ private:
|
|||||||
return x0;
|
return x0;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
|
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y, bool wbuffer)
|
||||||
{
|
{
|
||||||
this->x0 = x0;
|
this->x0 = x0;
|
||||||
this->y = y;
|
this->y = y;
|
||||||
@ -305,7 +316,7 @@ private:
|
|||||||
s32 x = XVal();
|
s32 x = XVal();
|
||||||
|
|
||||||
int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
|
int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
|
||||||
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1);
|
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1, wbuffer);
|
||||||
Interp.SetX(y);
|
Interp.SetX(y);
|
||||||
|
|
||||||
// used for calculating AA coverage
|
// used for calculating AA coverage
|
||||||
|
Reference in New Issue
Block a user