optimize per pixel timing counting

This commit is contained in:
Jaklyy
2023-12-10 18:41:17 -05:00
parent 785fab024f
commit 2bf033e0bc
3 changed files with 67 additions and 40 deletions

View File

@ -332,14 +332,14 @@ public:
}; };
// numbers based on 339 poly 64-172 horiz. line poly // numbers based on 339 poly 64-172 horiz. line poly
static constexpr int Frac = 481; // add a fractional component if pixels is not enough precision static constexpr int RasterFrac = 481; // add a fractional component if pixels is not enough precision
static constexpr int RasterTimingCap = 51116*Frac; static constexpr int RasterTimingCap = 51116*RasterFrac;
static constexpr int PerScanlineTiming = 1064*Frac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED static constexpr int PerScanlineTiming = 1064*RasterFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
static constexpr int PerScanlineRecup = 2112*Frac; // seems to check out? static constexpr int PerScanlineRecup = 2112*RasterFrac; // seems to check out?
static constexpr int PerRightSlope = 1*Frac; static constexpr int PerRightSlope = 1*RasterFrac;
static constexpr int PerPolyTiming = 12*Frac; // should be correct for *most* line polygons and polygons with vertical slopes static constexpr int PerPolyTiming = 12*RasterFrac; // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1*Frac; // does not apply to the first 4 pixels in a polygon (per scanline?) static constexpr int PerPixelTiming = 1*RasterFrac; // does not apply to the first 4 pixels in a polygon (per scanline?)
static constexpr int EmptyPolyScanline = 4*Frac - 14; // seems to be slightly under 4? static constexpr int EmptyPolyScanline = 4*RasterFrac - 14; // seems to be slightly under 4?
//static constexpr int FirstPixelTiming; //static constexpr int FirstPixelTiming;
class Renderer3D class Renderer3D

View File

@ -129,6 +129,29 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd)
return true; return true;
} }
s32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd)
{
// return the difference between the old span and the new span
if (pixels <= 4) return 0;
u32 pixeltiming = (pixels - 4) * RasterFrac;
if (odd)
{
u32 rasterend = RasterTimingCap - (RasterTimingCounterOdd + RasterTimingCounterPrev);
pixeltiming = rasterend - pixeltiming;
}
else
{
u32 rasterend = RasterTimingCap - (RasterTimingCounterEven + RasterTimingCounterPrev);
pixeltiming = rasterend - pixeltiming;
}
if (pixeltiming > 0) return 0;
GPU.GPU3D.DispCnt |= (1<<12);
return pixels - (((pixeltiming + (RasterFrac-1)) / RasterFrac) + 4);
}
void SoftRenderer::EndScanline(bool odd) void SoftRenderer::EndScanline(bool odd)
{ {
if (!odd) if (!odd)
@ -707,11 +730,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
} }
} }
bool SoftRenderer::Step(RendererPolygon* rp, bool abortscanline) void SoftRenderer::Step(RendererPolygon* rp)
{ {
rp->XL = rp->SlopeL.Step(); rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step(); rp->XR = rp->SlopeR.Step();
return abortscanline;
} }
void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y) void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
@ -981,7 +1003,11 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
CheckSlope(rp, y); CheckSlope(rp, y);
if (DoTimings(PerPolyTiming, odd)) return Step(rp, true); if (DoTimings(PerPolyTiming, odd))
{
Step(rp);
return true;
}
Vertex *vlcur, *vlnext, *vrcur, *vrnext; Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend; s32 xstart, xend;
@ -990,7 +1016,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1>* interp_start;
Interpolator<1>* interp_end; Interpolator<1>* interp_end;
u16 pixelsrendered = 0; // for tracking timings bool abortscanline = false; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
@ -1109,18 +1135,28 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
s32 xcov = 0; s32 xcov = 0;
if (xend > 256) xend = 256;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(xend-x, odd);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (l_edgecov & (1<<31)) if (l_edgecov & (1<<31))
{ {
xcov = (l_edgecov >> 12) & 0x3FF; xcov = (l_edgecov >> 12) & 0x3FF;
@ -1128,12 +1164,9 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
} }
for (; x < xlimit; x++) if (!l_filledge) x = xlimit;
else for (; x < xlimit; x++)
{ {
if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true);
pixelsrendered++;
if (!l_filledge) continue;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x; u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
@ -1223,16 +1256,12 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
// part 2: polygon inside // part 2: polygon inside
edge = yedge; edge = yedge;
xlimit = xend-r_edgelen+1; xlimit = xend-r_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
for (; x < xlimit; x++) if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++)
{ {
if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true);
pixelsrendered++;
if (wireframe && !edge) continue;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x; u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
@ -1315,20 +1344,16 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
// part 3: right edge // part 3: right edge
edge = yedge | 0x2; edge = yedge | 0x2;
xlimit = xend+1; xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (r_edgecov & (1<<31)) if (r_edgecov & (1<<31))
{ {
xcov = (r_edgecov >> 12) & 0x3FF; xcov = (r_edgecov >> 12) & 0x3FF;
if (xcov == 0x3FF) xcov = 0; if (xcov == 0x3FF) xcov = 0;
} }
//if (rp->SlopeR.Increment != 0 && DoTimings(PerRightSlope, odd)) return Step(rp, true); // should be fine to not immediately return? might be wrong
if (r_filledge)
for (; x < xlimit; x++) for (; x < xlimit; x++)
{ {
if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true);
pixelsrendered++;
if (!r_filledge) continue;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x; u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
@ -1415,7 +1440,8 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
} }
} }
return Step(rp, false); Step(rp);
return abortscanline;
} }
bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd) bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
@ -1429,7 +1455,7 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
if (abort) if (abort)
{ {
CheckSlope(rp, y); CheckSlope(rp, y);
Step(rp, NULL); Step(rp);
} }
else if (y == polygon->YBottom && y != polygon->YTop) else if (y == polygon->YBottom && y != polygon->YTop)
{ {

View File

@ -455,6 +455,7 @@ private:
melonDS::GPU& GPU; melonDS::GPU& GPU;
RendererPolygon PolygonList[2048]; RendererPolygon PolygonList[2048];
bool DoTimings(s32 cycles, bool odd); bool DoTimings(s32 cycles, bool odd);
s32 DoTimingsPixels(u32 pixels, bool odd);
void EndScanline(bool odd); void EndScanline(bool odd);
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha); void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t); u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
@ -462,7 +463,7 @@ private:
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y); void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y);
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y); void SetupPolygonRightEdge(RendererPolygon* rp, s32 y);
void SetupPolygon(RendererPolygon* rp, Polygon* polygon); void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
bool Step(RendererPolygon* rp, bool abortscanline); void Step(RendererPolygon* rp);
void CheckSlope(RendererPolygon* rp, s32 y); void CheckSlope(RendererPolygon* rp, s32 y);
void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); void RenderShadowMaskScanline(RendererPolygon* rp, s32 y);
bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd); bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd);