implement first draft of improved timing structure

This commit is contained in:
Jaklyy 2023-12-12 00:01:26 -05:00
parent 0d6a8e0fb9
commit 24eecec50f
3 changed files with 86 additions and 97 deletions

View File

@ -331,15 +331,24 @@ public:
u32 ScrolledLine[256]; u32 ScrolledLine[256];
}; };
// numbers based on 339 poly 64-172 horiz. line poly // rasteriztion timing constants
static constexpr int RasterFrac = 481; // add a fractional component if pixels is not enough precision static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
static constexpr int RasterTimingCap = 51116*RasterFrac;
static constexpr int PerScanlineTiming = 1064*RasterFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac;
static constexpr int PerScanlineRecup = 2112*RasterFrac; // seems to check out? static constexpr int GPU2DSpeedOutsidePair = 948 * TimingFrac;
static constexpr int PerRightSlope = 1*RasterFrac; static constexpr int ScanlinePairLength = 2130 * TimingFrac;
static constexpr int PerPolyTiming = 12*RasterFrac; // should be correct for *most* line polygons and polygons with vertical slopes static constexpr int ScanlineTimeout = 2126 * TimingFrac;
static constexpr int PerPixelTiming = 1*RasterFrac; // does not apply to the first 4 pixels in a polygon (per scanline?) static constexpr int InitGPU2DTimeout = 51618 * TimingFrac;
static constexpr int EmptyPolyScanline = 4*RasterFrac - 14; // seems to be slightly under 4? static constexpr int ScanlineBreak = 4 * TimingFrac;
static constexpr int PerPolyTiming = 12 * TimingFrac; // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1 * TimingFrac; // does not apply to the first 4 pixels in a polygon (per scanline?)
// static constexpr int RasterTimingCap = 51116 * TimingFrac;
static constexpr int PerScanlineTiming = 1064 * TimingFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
static constexpr int PerScanlineRecup = 2112 * TimingFrac; // seems to check out? // should be the "free" time the gpu has to do the calculation
static constexpr int PerRightSlope = 1 * TimingFrac;
static constexpr int EmptyPolyScanline = 4 * TimingFrac - 14; // seems to be slightly under 4?
//static constexpr int FirstPixelTiming; //static constexpr int FirstPixelTiming;
class Renderer3D class Renderer3D

View File

@ -114,16 +114,14 @@ void SoftRenderer::SetThreaded(bool threaded) noexcept
bool SoftRenderer::DoTimings(s32 cycles, bool odd) bool SoftRenderer::DoTimings(s32 cycles, bool odd)
{ {
if (odd) // add timings to a counter and check if underflowed.
{
RasterTimingCounterOdd += cycles; s32* counter;
if ((RasterTimingCounterOdd + RasterTimingCounterPrev) < RasterTimingCap) return false; if (odd) counter = &RasterTimingOdd;
} else counter = &RasterTimingEven;
else
{ *counter += cycles;
RasterTimingCounterEven += cycles; if (RasterTiming - *counter) return false;
if ((RasterTimingCounterEven + RasterTimingCounterPrev) < RasterTimingCap) return false;
}
GPU.GPU3D.DispCnt |= (1<<12); GPU.GPU3D.DispCnt |= (1<<12);
return true; return true;
@ -131,28 +129,22 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd)
u32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd) u32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd)
{ {
// return the difference between the old span and the new span // calculate and return the difference between the old span and the new span, while adding timings to the timings counter
// pixels dont count towards timings if they're the first 4 pixels in a scanline (for some reason?)
if (pixels <= 4) return 0; if (pixels <= 4) return 0;
u32 pixelsremain = pixels-4; u32 pixelsremain = pixels-4;
u32 timinglimit = RasterTimingCap - RasterTimingCounterPrev;
s32* counter;
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
//todo: do this without a for loop somehow. //todo: do this without a for loop somehow.
if (odd) for (; pixelsremain > 0; pixelsremain--)
{ {
for (; pixelsremain > 0; pixelsremain--) *counter += TimingFrac;
{ if (!(RasterTiming - *counter)) break;
RasterTimingCounterOdd += RasterFrac;
if (RasterTimingCounterOdd >= timinglimit) break;
}
}
else
{
for (; pixelsremain > 0; pixelsremain--)
{
RasterTimingCounterEven += RasterFrac;
if (RasterTimingCounterEven >= timinglimit) break;
}
} }
if (pixelsremain <= 0) return 0; if (pixelsremain <= 0) return 0;
@ -165,17 +157,7 @@ void SoftRenderer::EndScanline(bool odd)
{ {
if (!odd) if (!odd)
{ {
RasterTimingCounterPrev += std::max(RasterTimingCounterOdd, RasterTimingCounterEven); RasterTiming += std::max(RasterTimingOdd, RasterTimingEven);
RasterTimingCounterPrev -= PerScanlineRecup; // wip
if (RasterTimingCounterPrev < 0) RasterTimingCounterPrev = 0;
// calc is wrong, seems to round up...?
GPU.GPU3D.RDLines = (RasterTimingCap - RasterTimingCounterPrev) / PerScanlineTiming;
// seems to display the lowest scanline buffer count reached during the current frame.
// we also caps it to 46 here, because this reg does that too for some reason.
if (GPU.GPU3D.RDLines > GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLines = GPU.GPU3D.RDLinesMin;
else if (GPU.GPU3D.RDLines < GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLinesMin = GPU.GPU3D.RDLines;
RasterTimingCounterOdd = 0;
RasterTimingCounterEven = 0;
} }
} }
@ -1176,8 +1158,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
if (!l_filledge) x = xlimit; if (!l_filledge) x = xlimit;
else for (; x < xlimit; x++) else for (; x < xlimit; x++)
{ {
u32 pixeladdr = (y * ScanlineWidth) + x;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows // check stencil buffer for shadows
@ -1271,8 +1252,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
if (wireframe && !edge) x = std::max(x, xlimit); if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++) else for (; x < xlimit; x++)
{ {
u32 pixeladdr = (y * ScanlineWidth) + x;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows // check stencil buffer for shadows
@ -1363,8 +1343,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
if (r_filledge) if (r_filledge)
for (; x < xlimit; x++) for (; x < xlimit; x++)
{ {
u32 pixeladdr = (y * ScanlineWidth) + x;
u32 pixeladdr = (BufferOffset * ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows // check stencil buffer for shadows
@ -1479,7 +1458,6 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
} }
} }
BufferOffset = (BufferOffset + 1) & 0x7; // loop if == 8
EndScanline(odd); EndScanline(odd);
return abort; return abort;
} }
@ -1523,18 +1501,11 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr)
return density; return density;
} }
void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool late, bool odd, bool finish) void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool odd)
{ {
// to consider: // to consider:
// clearing all polygon fog flags if the master flag isn't set? // clearing all polygon fog flags if the master flag isn't set?
// merging all final pass loops into one? // merging all final pass loops into one?
u8 tempoffset;
if (finish)
tempoffset = (BufferOffset - 2 + (!odd));
else
tempoffset = (BufferOffset - 4 + (!odd));
if (tempoffset > 7) tempoffset -= 0xF8; // handle underflows
/*if (GPU.GPU3D.RenderDispCnt & (1<<5)) /*if (GPU.GPU3D.RenderDispCnt & (1<<5))
{ {
@ -1591,7 +1562,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool late, bool o
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = (tempoffset * ScanlineWidth) + x; u32 pixeladdr = (y * ScanlineWidth) + x;
u32 density, srccolor, srcR, srcG, srcB, srcA; u32 density, srccolor, srcR, srcG, srcB, srcA;
u32 attr = AttrBuffer[pixeladdr]; u32 attr = AttrBuffer[pixeladdr];
@ -1656,7 +1627,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool late, bool o
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = (tempoffset * ScanlineWidth) + x; u32 pixeladdr = (y * ScanlineWidth) + x;
u32 attr = AttrBuffer[pixeladdr]; u32 attr = AttrBuffer[pixeladdr];
if (!(attr & 0xF)) continue; if (!(attr & 0xF)) continue;
@ -1699,19 +1670,20 @@ void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool late, bool o
} }
} }
if (late) // if the first two scanlines are late then it's delayed by 48 scanlines
if (false)//late)
{ {
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth); memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth);
memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[tempoffset*ScanlineWidth], 4 * ScanlineWidth); memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], 4 * ScanlineWidth);
} }
else else
{ {
memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[tempoffset*ScanlineWidth], 4 * ScanlineWidth); memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], 4 * ScanlineWidth);
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth); memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth);
} }
} }
void SoftRenderer::ClearBuffers(s32 y) void SoftRenderer::ClearBuffers()
{ {
u32 clearz = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; u32 clearz = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
u32 polyid = GPU.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID u32 polyid = GPU.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID
@ -1721,9 +1693,9 @@ void SoftRenderer::ClearBuffers(s32 y)
if (GPU.GPU3D.RenderDispCnt & (1<<14)) if (GPU.GPU3D.RenderDispCnt & (1<<14))
{ {
u8 xoff = (GPU.GPU3D.RenderClearAttr2 >> 16) & 0xFF; u8 xoff = (GPU.GPU3D.RenderClearAttr2 >> 16) & 0xFF;
u8 yoff = ((GPU.GPU3D.RenderClearAttr2 >> 24) & 0xFF) + y; u8 yoff = ((GPU.GPU3D.RenderClearAttr2 >> 24) & 0xFF);
for (int i = 0; i < 2; i++) for (int y = 0; y < 192; y++)
{ {
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
@ -1739,7 +1711,7 @@ void SoftRenderer::ClearBuffers(s32 y)
u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF; u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF;
u32 pixeladdr = ((BufferOffset+i) * ScanlineWidth) + x; u32 pixeladdr = (y * ScanlineWidth) + x;
ColorBuffer[pixeladdr] = color; ColorBuffer[pixeladdr] = color;
DepthBuffer[pixeladdr] = z; DepthBuffer[pixeladdr] = z;
AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000); AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000);
@ -1761,11 +1733,11 @@ void SoftRenderer::ClearBuffers(s32 y)
polyid |= (GPU.GPU3D.RenderClearAttr1 & 0x8000); polyid |= (GPU.GPU3D.RenderClearAttr1 & 0x8000);
for (int i = 0; i < 2; i++) for (int y = 0; y < 192; y++)
{ {
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = ((BufferOffset+i) * ScanlineWidth) + x; u32 pixeladdr = (y * ScanlineWidth) + x;
ColorBuffer[pixeladdr] = color; ColorBuffer[pixeladdr] = color;
DepthBuffer[pixeladdr] = clearz; DepthBuffer[pixeladdr] = clearz;
AttrBuffer[pixeladdr] = polyid; AttrBuffer[pixeladdr] = polyid;
@ -1785,24 +1757,36 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
s32 y = 0; s32 y = 0;
s8 prevbufferline = -2; s8 prevbufferline = -2;
bool latebuffer[192] = {};
u8 buffersize = 0;
RasterTiming = (ScanlinePairLength * 24);
RasterTimingOdd = 0;
RasterTimingEven = 0;
ClearBuffers();
for (u8 quarter = 0; quarter < 4; quarter++) for (u8 quarter = 0; quarter < 4; quarter++)
for (u8 bufferline = 0; bufferline < 48; bufferline += 2) for (u8 bufferline = 0; bufferline < 48; bufferline += 2)
{ {
ClearBuffers(y); RasterTiming += (ScanlineTimeout);
latebuffer[y] = RenderScanline(y, j, true);
latebuffer[y+1] = RenderScanline(y+1, j, false); if (buffersize >= 50) RasterTiming = (ScanlinePairLength * 23) + ScanlineTimeout;
RenderScanline(y, j, true);
RenderScanline(y+1, j, false);
RasterTiming += ScanlineBreak;
u32* RDLinesReg = &GPU.GPU3D.RDLines;
*RDLinesReg = 0;
for (int i = RasterTiming; i > ScanlinePairLength / 2; i -= ScanlinePairLength / 2) *RDLinesReg += 1;
// seems to display the lowest scanline buffer count reached during the current frame.
// we also caps it to 46 here, because this reg does that too for some reason.
if (*RDLinesReg > GPU.GPU3D.RDLinesMin) *RDLinesReg = GPU.GPU3D.RDLinesMin;
else if (*RDLinesReg < GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLinesMin = *RDLinesReg;
if (prevbufferline >= 0) if (prevbufferline >= 0)
{ {
if (!latebuffer[y-2]) latebuffer[y] = false; ScanlineFinalPass(y-2, prevbufferline, true);
ScanlineFinalPass(y-1, prevbufferline+1, false);
if (!latebuffer[y-1]) latebuffer[y+1] = false;
ScanlineFinalPass(y-2, prevbufferline, latebuffer[y-2], true, false);
ScanlineFinalPass(y-1, prevbufferline+1, latebuffer[y-1], false, false);
} }
y += 2; y += 2;
@ -1812,8 +1796,8 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
Platform::Semaphore_Post(Sema_ScanlineCount); Platform::Semaphore_Post(Sema_ScanlineCount);
} }
ScanlineFinalPass(190, prevbufferline, latebuffer[190], true, true); ScanlineFinalPass(190, prevbufferline, true);
ScanlineFinalPass(191, prevbufferline+1, latebuffer[191], false, true); ScanlineFinalPass(191, prevbufferline+1, false);
if (threaded) if (threaded)
Platform::Semaphore_Post(Sema_ScanlineCount); Platform::Semaphore_Post(Sema_ScanlineCount);
@ -1823,10 +1807,6 @@ void SoftRenderer::VCount144()
{ {
if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU.GPU3D.AbortFrame) if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU.GPU3D.AbortFrame)
Platform::Semaphore_Wait(Sema_RenderDone); Platform::Semaphore_Wait(Sema_RenderDone);
RasterTimingCounterPrev = 0;
RasterTimingCounterOdd = 0;
RasterTimingCounterEven = 0;
} }
void SoftRenderer::RenderFrame() void SoftRenderer::RenderFrame()

View File

@ -469,16 +469,17 @@ private:
bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd); bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd);
bool RenderScanline(s32 y, int npolys, bool odd); bool RenderScanline(s32 y, int npolys, bool odd);
u32 CalculateFogDensity(u32 pixeladdr); u32 CalculateFogDensity(u32 pixeladdr);
void ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool late, bool odd, bool finish); void ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool odd);
void ClearBuffers(s32 y); void ClearBuffers();
void RenderPolygons(bool threaded, Polygon** polygons, int npolys); void RenderPolygons(bool threaded, Polygon** polygons, int npolys);
void RenderThreadFunc(); void RenderThreadFunc();
// counters for scanline rasterization timings // counters for scanline rasterization timings
s32 RasterTimingCounterPrev = 0; s32 RasterTiming = 0;
s32 RasterTimingCounterOdd = 0; //s32 RasterTimingCounterPrev = 0;
s32 RasterTimingCounterEven = 0; s32 RasterTimingOdd = 0;
s32 RasterTimingEven = 0;
// buffer dimensions are 258x194 to add a offscreen 1px border // buffer dimensions are 258x194 to add a offscreen 1px border
// which simplifies edge marking tests // which simplifies edge marking tests
@ -489,7 +490,7 @@ private:
static constexpr int ScanlineWidth = 256; static constexpr int ScanlineWidth = 256;
static constexpr int NumScanlines = 192; static constexpr int NumScanlines = 192;
static constexpr int NumScanlinesRD = 48; static constexpr int NumScanlinesRD = 48;
static constexpr int NumScanlinesInternal = 8; static constexpr int NumScanlinesInternal = 192;
static constexpr int InternalBufferSize = ScanlineWidth * NumScanlinesInternal; static constexpr int InternalBufferSize = ScanlineWidth * NumScanlinesInternal;
static constexpr int RDBufferSize = ScanlineWidth * NumScanlinesRD; static constexpr int RDBufferSize = ScanlineWidth * NumScanlinesRD;
static constexpr int BufferSize = ScanlineWidth * NumScanlines; static constexpr int BufferSize = ScanlineWidth * NumScanlines;
@ -498,7 +499,6 @@ private:
u32 ColorBuffer[InternalBufferSize * 2]; u32 ColorBuffer[InternalBufferSize * 2];
u32 DepthBuffer[InternalBufferSize * 2]; u32 DepthBuffer[InternalBufferSize * 2];
u32 AttrBuffer[InternalBufferSize * 2]; u32 AttrBuffer[InternalBufferSize * 2];
u8 BufferOffset;
u32 RDBuffer[RDBufferSize]; u32 RDBuffer[RDBufferSize];
u32 FinalBuffer[BufferSize]; u32 FinalBuffer[BufferSize];