optimize per pixel timing counting

This commit is contained in:
Jaklyy 2023-12-10 18:41:17 -05:00
parent 785fab024f
commit 2bf033e0bc
3 changed files with 67 additions and 40 deletions

View File

@ -332,14 +332,14 @@ public:
};
// numbers based on 339 poly 64-172 horiz. line poly
static constexpr int Frac = 481; // add a fractional component if pixels is not enough precision
static constexpr int RasterTimingCap = 51116*Frac;
static constexpr int PerScanlineTiming = 1064*Frac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
static constexpr int PerScanlineRecup = 2112*Frac; // seems to check out?
static constexpr int PerRightSlope = 1*Frac;
static constexpr int PerPolyTiming = 12*Frac; // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1*Frac; // does not apply to the first 4 pixels in a polygon (per scanline?)
static constexpr int EmptyPolyScanline = 4*Frac - 14; // seems to be slightly under 4?
static constexpr int RasterFrac = 481; // add a fractional component if pixels is not enough precision
static constexpr int RasterTimingCap = 51116*RasterFrac;
static constexpr int PerScanlineTiming = 1064*RasterFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
static constexpr int PerScanlineRecup = 2112*RasterFrac; // seems to check out?
static constexpr int PerRightSlope = 1*RasterFrac;
static constexpr int PerPolyTiming = 12*RasterFrac; // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1*RasterFrac; // does not apply to the first 4 pixels in a polygon (per scanline?)
static constexpr int EmptyPolyScanline = 4*RasterFrac - 14; // seems to be slightly under 4?
//static constexpr int FirstPixelTiming;
class Renderer3D

View File

@ -129,6 +129,29 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd)
return true;
}
s32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd)
{
// return the difference between the old span and the new span
if (pixels <= 4) return 0;
u32 pixeltiming = (pixels - 4) * RasterFrac;
if (odd)
{
u32 rasterend = RasterTimingCap - (RasterTimingCounterOdd + RasterTimingCounterPrev);
pixeltiming = rasterend - pixeltiming;
}
else
{
u32 rasterend = RasterTimingCap - (RasterTimingCounterEven + RasterTimingCounterPrev);
pixeltiming = rasterend - pixeltiming;
}
if (pixeltiming > 0) return 0;
GPU.GPU3D.DispCnt |= (1<<12);
return pixels - (((pixeltiming + (RasterFrac-1)) / RasterFrac) + 4);
}
void SoftRenderer::EndScanline(bool odd)
{
if (!odd)
@ -707,11 +730,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
}
}
bool SoftRenderer::Step(RendererPolygon* rp, bool abortscanline)
void SoftRenderer::Step(RendererPolygon* rp)
{
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
return abortscanline;
}
void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
@ -981,7 +1003,11 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
CheckSlope(rp, y);
if (DoTimings(PerPolyTiming, odd)) return Step(rp, true);
if (DoTimings(PerPolyTiming, odd))
{
Step(rp);
return true;
}
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend;
@ -990,7 +1016,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
u16 pixelsrendered = 0; // for tracking timings
bool abortscanline = false; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL;
xend = rp->XR;
@ -1109,18 +1135,28 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
int edge;
s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0;
s32 xlimit;
s32 xcov = 0;
if (xend > 256) xend = 256;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(xend-x, odd);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
// part 1: left edge
edge = yedge | 0x1;
xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
if (xlimit > xend) xlimit = xend;
if (l_edgecov & (1<<31))
{
xcov = (l_edgecov >> 12) & 0x3FF;
@ -1128,12 +1164,9 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
}
for (; x < xlimit; x++)
if (!l_filledge) x = xlimit;
else for (; x < xlimit; x++)
{
if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true);
pixelsrendered++;
if (!l_filledge) continue;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr];
@ -1223,16 +1256,12 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
// part 2: polygon inside
edge = yedge;
xlimit = xend-r_edgelen+1;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend-r_edgelen;
if (xlimit > xend) xlimit = xend;
for (; x < xlimit; x++)
if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++)
{
if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true);
pixelsrendered++;
if (wireframe && !edge) continue;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr];
@ -1315,20 +1344,16 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
// part 3: right edge
edge = yedge | 0x2;
xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend;
if (r_edgecov & (1<<31))
{
xcov = (r_edgecov >> 12) & 0x3FF;
if (xcov == 0x3FF) xcov = 0;
}
//if (rp->SlopeR.Increment != 0 && DoTimings(PerRightSlope, odd)) return Step(rp, true); // should be fine to not immediately return? might be wrong
if (r_filledge)
for (; x < xlimit; x++)
{
if (pixelsrendered >= 4 && DoTimings(PerPixelTiming, odd)) return Step(rp, true);
pixelsrendered++;
if (!r_filledge) continue;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr];
@ -1415,7 +1440,8 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
}
}
return Step(rp, false);
Step(rp);
return abortscanline;
}
bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
@ -1429,7 +1455,7 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
if (abort)
{
CheckSlope(rp, y);
Step(rp, NULL);
Step(rp);
}
else if (y == polygon->YBottom && y != polygon->YTop)
{

View File

@ -455,6 +455,7 @@ private:
melonDS::GPU& GPU;
RendererPolygon PolygonList[2048];
bool DoTimings(s32 cycles, bool odd);
s32 DoTimingsPixels(u32 pixels, bool odd);
void EndScanline(bool odd);
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
@ -462,7 +463,7 @@ private:
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y);
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y);
void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
bool Step(RendererPolygon* rp, bool abortscanline);
void Step(RendererPolygon* rp);
void CheckSlope(RendererPolygon* rp, s32 y);
void RenderShadowMaskScanline(RendererPolygon* rp, s32 y);
bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd);