This commit is contained in:
Jaklyy 2023-12-19 22:52:54 -05:00
parent 2217a34d39
commit 1054011c90
4 changed files with 170 additions and 100 deletions

View File

@ -222,7 +222,7 @@ void GPU3D::Reset() noexcept
AlphaRefVal = 0;
AlphaRef = 0;
RDLinesDisplay = 46;
RDLines = 46;
memset(ToonTable, 0, sizeof(ToonTable));
memset(EdgeTable, 0, sizeof(EdgeTable));
@ -2369,7 +2369,7 @@ void GPU3D::CheckFIFODMA() noexcept
void GPU3D::VCount144() noexcept
{
RDLinesDisplay = 46;
RDLines = 46;
CurrentRenderer->VCount144();
}
@ -2613,7 +2613,7 @@ u16 GPU3D::Read16(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return RDLinesDisplay; // IT IS TIME
return RDLines; // IT IS TIME
case 0x04000600:
{
@ -2657,7 +2657,7 @@ u32 GPU3D::Read32(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return RDLinesDisplay; // IT IS TIME
return RDLines; // IT IS TIME
case 0x04000600:
{

View File

@ -245,7 +245,7 @@ public:
bool RenderingEnabled = false;
u32 DispCnt = 0;
u32 RDLinesDisplay = 0;
u32 RDLines = 0;
u8 AlphaRefVal = 0;
u8 AlphaRef = 0;
@ -334,34 +334,47 @@ public:
static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
// GPU 2D read timings, for emulating race conditions
static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac;
static constexpr int GPU2DSpeedOutsidePair = 810 * TimingFrac;
static constexpr int GPU2DSpeedReadScanline = 256 * TimingFrac;
static constexpr int InitGPU2DTimeout = 51618 * TimingFrac;
static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac; // the delay between finishing reading the first scanline and beginning reading the second scanline of a scanline pair.
static constexpr int GPU2DSpeedOutsidePair = 810 * TimingFrac; // the delay between finishing reading a pair and beginning reading a new pair.
static constexpr int GPU2DReadScanline = 256 * TimingFrac; // the time it takes to read a scanline.
static constexpr int GPU2DReadSLPair = 1618 * TimingFrac; // notably the same as the scanline increment.
static constexpr int InitGPU2DTimeout = 50000 * TimingFrac; // 51618? | when it starts reading the first scanline.
static constexpr int GPU2D48Scanlines = GPU2DReadSLPair * 48 * TimingFrac; // time to read 48 scanlines.
// GPU 3D rasterization timings, for emulating the timeout
static constexpr int ScanlinePairLength = 2130 * TimingFrac;
//static constexpr int ScanlinePairLength = 2130 * TimingFrac;
//static constexpr int ScanlineTimeout = 1686 * TimingFrac; // 2126? 1686?
//static constexpr int ScanlineBreak = 4 * TimingFrac;
//static constexpr int ScanlineBreak2 = 40 * TimingFrac;
static constexpr int ScanlineIncrement = 1618 * TimingFrac; // how much to increment per scanline pair
static constexpr int AbortIncrement = 12 * TimingFrac; // how much extra to increment after an aborted scanline (total 1630)
static constexpr int FreeTiming = 496 * TimingFrac; // every scanline has a free 496 pixels worth of timing for some reason.
static constexpr int InitialTiming = 48688 * TimingFrac; // add 1618*2 to get the timeout of the second scanline pair
static constexpr int Post50Max = 51116 * TimingFrac; // for some reason it doesn't care about how full it actually is, it just cares about if its the first 50 scanlines to speedrun rendering?
//static constexpr int FakeTiming = 2 * TimingFrac;
//static constexpr int FraudulentTiming = 1120 * TimingFrac; // bad theory. todo: find a better one.
static constexpr int InitialTiming = 48688 * TimingFrac; // 48688 | add 1618*2 to get the timeout of the second scanline pair
static constexpr int Post50Max = 51116 * TimingFrac; // 51116 | for some reason it doesn't care about how full it actually is, it just cares about if its the first 50 scanlines to speedrun rendering?
static constexpr int FreeTiming = 496 * TimingFrac; // 496 | every scanline has a free 496 pixels worth of timing for some reason.
static constexpr int ScanlineIncrement = 1618 * TimingFrac; // 1618 | how much to regain per scanline pair
static constexpr int AbortIncrement = 12 * TimingFrac; // 12 | how much extra to regain after an aborted scanline (total 1630)
// GPU 3D rasterization timings II, for counting each element with timing characteristics
static constexpr int FirstPolyScanline = 0 * TimingFrac;
static constexpr int PerPolyScanline = 12 * TimingFrac; // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1 * TimingFrac; // 1 pixel = 1 pixel
static constexpr int NumFreePixels = 4; // First 4 pixels in a polygon scanline are free (for some reason)
//static constexpr int FirstPolyScanline = 0 * TimingFrac;
static constexpr int PerPolyScanline = 12 * TimingFrac; // 12 | should be 12, but 14 is "correct" // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPixelTiming = 1 * TimingFrac; // 1 | 1 pixel = 1 pixel
static constexpr int NumFreePixels = 4; // 4 | First 4 pixels in a polygon scanline are free (for some reason)
static constexpr int MinToStartPoly = 2 * TimingFrac; // 1 | if there is not 1 cycle remaining, do not bother rendering polygon (CHECKME: I dont think this should decrement timings by anything?)
static constexpr int EmptyPolyScanline = 4 * TimingFrac; // - 14; // 4 | seems to be slightly under 4 px?
// GPU 3D rasterization timing III, for first polygon exclusive timing characteristics
// should be done first, as these are "async" pre-calcs of polygon attributes
static constexpr int FirstVSlope = 0 * TimingFrac; // 1 | the first polygon in a scanline having two vertical slopes adds 1 to timings...?
static constexpr int FirstNull = 1 * TimingFrac; // 1 | if the first polygon is "null" (probably wrong?)
// static constexpr int RasterTimingCap = 51116 * TimingFrac;
static constexpr int PerScanlineTiming = 1064 * TimingFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
static constexpr int PerScanlineRecup = 2112 * TimingFrac; // seems to check out? // should be the "free" time the gpu has to do the calculation
static constexpr int PerRightSlope = 1 * TimingFrac;
static constexpr int EmptyPolyScanline = 4 * TimingFrac;// - 14; // seems to be slightly under 4?
//static constexpr int FirstPixelTiming;
// static constexpr int PerScanlineTiming = 1064 * TimingFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
// static constexpr int PerScanlineRecup = 2112 * TimingFrac; // seems to check out? // should be the "free" time the gpu has to do the calculation
// static constexpr int PerRightSlope = 1 * TimingFrac;
// static constexpr int FirstPixelTiming;
class Renderer3D
{

View File

@ -127,20 +127,32 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd)
return true;
}
bool SoftRenderer::CheckTimings(s32 cycles, bool odd)
{
// check if there are 'cycles' amount of cycles remaining.
s32* counter;
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
if (RasterTiming - *counter >= cycles) return true;
else return false;
}
u32 SoftRenderer::DoTimingsPixels(s32 pixels, bool odd)
{
// calculate and return the difference between the old span and the new span, while adding timings to the timings counter
// pixels dont count towards timings if they're the first 4 pixels in a scanline (for some reason?)
if (pixels <= 4) return 0;
if (pixels <= NumFreePixels) return 0;
pixels -= 4;
pixels -= NumFreePixels;
s32* counter;
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
//todo: figure out a faster way to support TimingFrac > 1 without using a for loop somehow.
//todo: figure out a faster way to support TimingFrac > 1 without using a for loop somehow. (fingers crossed we dont have to!)
if constexpr (TimingFrac > 1)
for (; pixels > 0; pixels--)
{
@ -160,6 +172,26 @@ u32 SoftRenderer::DoTimingsPixels(s32 pixels, bool odd)
return pixels;
}
bool SoftRenderer::DoTimingsSlopes(RendererPolygon* rp, s32 y, bool odd)
{
// determine the timing impact of the first polygon's slopes.
Polygon* polygon = rp->PolyData;
if (polygon->YTop == polygon->YBottom) return false;
if (y == polygon->YTop) return false;
s32* counter;
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) *counter += 1;
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) *counter += 1;
return DoTimings(2, odd); // CHECKME: does this need to be done time its incremented here?
}
void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{
u32 vramaddr = (texparam & 0xFFFF) << 3;
@ -744,7 +776,7 @@ void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
}
}
void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
bool SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y, bool odd)
{
Polygon* polygon = rp->PolyData;
@ -766,19 +798,8 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
memset(&StencilBuffer[256 * (y&0x1)], 0, 256);
PrevIsShadowMask = true;
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
CheckSlope(rp, y);
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend;
@ -787,6 +808,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
bool abortscanline; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL;
xend = rp->XR;
@ -870,7 +892,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
// similarly, we can perform alpha test early (checkme)
if (wireframe) polyalpha = 31;
if (polyalpha <= GPU.GPU3D.RenderAlphaRef) return;
if (polyalpha <= GPU.GPU3D.RenderAlphaRef) return false; // TODO: check how this impacts timings?
// in wireframe mode, there are special rules for equal Z (TODO)
@ -880,10 +902,23 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
int edge;
s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0;
s32 xlimit;
if (xend > 256) xend = 256;
// determine if the span can be rendered within the time allotted to the scanline
// TODO: verify the timing characteristics of shadow masks are the same as regular polygons.
s32 diff = DoTimingsPixels(xend-x, odd);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
else abortscanline = false;
// for shadow masks: set stencil bits where the depth test fails.
// draw nothing.
@ -891,8 +926,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
// part 1: left edge
edge = yedge | 0x1;
xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
if (xlimit > xend) xlimit = xend;
if (!l_filledge) x = xlimit;
else
@ -918,9 +952,8 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
// part 2: polygon inside
edge = yedge;
xlimit = xend-r_edgelen+1;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend-r_edgelen;
if (xlimit > xend) xlimit = xend;
if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++)
{
@ -944,8 +977,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
// part 3: right edge
edge = yedge | 0x2;
xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend;
if (r_filledge)
for (; x < xlimit; x++)
@ -967,9 +999,9 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
StencilBuffer[256*(y&0x1) + x] |= 0x2;
}
}
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
Step(rp);
return abortscanline;
}
bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
@ -993,12 +1025,6 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
CheckSlope(rp, y);
if (DoTimings(PerPolyScanline, odd))
{
Step(rp);
return true;
}
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend;
bool l_filledge, r_filledge;
@ -1006,7 +1032,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
bool abortscanline = false; // to abort the rest of the scanline after finishing this polygon
bool abortscanline; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL;
xend = rp->XR;
@ -1142,6 +1168,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
r_edgelen -= diff;
abortscanline = true;
}
else abortscanline = false;
// part 1: left edge
edge = yedge | 0x1;
@ -1434,6 +1461,7 @@ bool SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd)
bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
{
bool abort = false;
bool first = true;
for (int i = 0; i < npolys; i++)
{
RendererPolygon* rp = &PolygonList[i];
@ -1441,20 +1469,29 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
if (y == polygon->YBottom && y != polygon->YTop)
{
if (DoTimings(EmptyPolyScanline, odd)) abort = true;
if (!abort) abort = (first && DoTimings(FirstNull, odd)) || DoTimings(EmptyPolyScanline, odd);
first = false;
}
else if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{
if (y == polygon->YTop) if(DoTimings(FirstPolyScanline, odd)) abort = true;
//if (y == polygon->YTop) if(DoTimings(FirstPolyScanline, odd)) abort = true;
if (!abort) abort = (first && DoTimingsSlopes(rp, y, odd)) // incorrect. needs research; behavior is strange...
|| DoTimings(PerPolyScanline, odd)
|| (!CheckTimings(MinToStartPoly, odd));
if (abort)
{
CheckSlope(rp, y);
Step(rp);
}
else if (polygon->IsShadowMask)
;//RenderShadowMaskScanline(rp, y);
abort = RenderShadowMaskScanline(rp, y, odd);
else
if (RenderPolygonScanline(rp, y, odd)) abort = true;
abort = RenderPolygonScanline(rp, y, odd);
first = false;
}
}
@ -1500,7 +1537,7 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr)
return density;
}
void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool odd)
void SoftRenderer::ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool odd, s32 uhohzone)
{
// to consider:
// clearing all polygon fog flags if the master flag isn't set?
@ -1759,10 +1796,13 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
s8 buffersize = 0;
RasterTiming = InitialTiming;
s32 timingadvance = InitialTiming;
bool abort = false;
//u32* RDLinesReg = &GPU.GPU3D.RDLines;
ClearBuffers();
s32 gpu2dtracking = InitGPU2DTimeout;
s32 gpu2dfreetime = InitGPU2DTimeout;
s32 prev2dtime;
bool readodd = true;
for (u8 quarter = 0; quarter < 4; quarter++)
for (u8 bufferline = 0; bufferline < 48; bufferline += 2)
{
@ -1770,13 +1810,19 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
RasterTimingEven = 0;
RasterTiming += ScanlineIncrement;
gpu2dtracking += GPU2DReadSLPair;
if (abort) RasterTiming += AbortIncrement; // if previous scanline was aborted, allow an extra 12 pixels worth of timing
if (y >= 50)
{
if (RasterTiming > Post50Max) RasterTiming = Post50Max;
timingadvance = 0;
buffersize = 48;
gpu2dfreetime = 0;
if (RasterTiming > Post50Max)
{
s32 temp = RasterTiming - Post50Max;
RasterTiming = Post50Max;
gpu2dtracking -= temp;
}
if (buffersize > 48) buffersize = 48;
}
abort = RenderScanline(y, j, true);
@ -1785,50 +1831,59 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
buffersize += 2;
//RasterTiming += ScanlineBreak;
s32 timespent = std::max(RasterTimingOdd, RasterTimingEven);
/*if (timespent > FreeTiming)
{
abort = true;
timespent -= FreeTiming;
}
else if (!abort)
{
abort = false;
timespent -= FreeTiming;
}*/
//if (!abort)
//if (buffersize > 48) timespent -= PerScanlineRecup;
/*else*/
timespent -= FreeTiming;
if (timespent > 0)
{
RasterTiming -= timespent;
timingadvance -= timespent;
}
// measure scanlines being read here.
gpu2dtracking -= timespent;
gpu2dfreetime -= timespent;
if (timingadvance < 0) for (s32 i = (ScanlinePairLength / 2) * buffersize; i > RasterTiming + (ScanlinePairLength / 2); i -= ScanlinePairLength / 2) buffersize -= 1;
if (buffersize < 0) buffersize = 0;
if (timespent > 0) RasterTiming -= timespent;
//if (RasterTiming < 0) RasterTiming = 0;
if (gpu2dfreetime <= 0)
{
buffersize = 0;
if (gpu2dtracking > 0)
{
s32 i = gpu2dtracking;
while (true)
{
s32 comp = GPU2DReadSLPair/2;
//if (readodd) comp = GPU2DSpeedOutsidePair + GPU2DReadScanline;
//else comp = GPU2DSpeedWithinPair + GPU2DReadScanline;
// seems to display the lowest scanline buffer count reached during the current frame.
// we also caps it to 46 here, because this reg does that too for some reason.
if (quarter >= 1 && buffersize < GPU.GPU3D.RDLinesDisplay) GPU.GPU3D.RDLinesDisplay = buffersize;
if (i < comp) break;
i -= comp;
buffersize++;
//readodd = !readodd;
}
if (i > 0) buffersize++;
}
// seems to display the lowest scanline buffer count reached during the current frame.
// we also caps it to 46 here, because this reg does that too for some reason.
if (GPU.GPU3D.RDLines > buffersize) GPU.GPU3D.RDLines = buffersize;
}
if (prevbufferline >= 0)
{
ScanlineFinalPass(y-2, prevbufferline, true);
ScanlineFinalPass(y-1, prevbufferline+1, false);
ScanlineFinalPass(y-2, prevbufferline, true, prev2dtime);
ScanlineFinalPass(y-1, prevbufferline+1, false, prev2dtime);
}
y += 2;
prevbufferline = bufferline;
prev2dtime = gpu2dtracking;
if (threaded)
Platform::Semaphore_Post(Sema_ScanlineCount);
}
ScanlineFinalPass(190, prevbufferline, true);
ScanlineFinalPass(191, prevbufferline+1, false);
ScanlineFinalPass(190, prevbufferline, true, prev2dtime);
ScanlineFinalPass(191, prevbufferline+1, false, prev2dtime);
if (threaded)
Platform::Semaphore_Post(Sema_ScanlineCount);

View File

@ -455,7 +455,9 @@ private:
melonDS::GPU& GPU;
RendererPolygon PolygonList[2048];
bool DoTimings(s32 cycles, bool odd);
bool CheckTimings(s32 cycles, bool odd);
u32 DoTimingsPixels(s32 pixels, bool odd);
bool DoTimingsSlopes(RendererPolygon* rp, s32 y, bool odd);
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
@ -464,11 +466,11 @@ private:
void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
void Step(RendererPolygon* rp);
void CheckSlope(RendererPolygon* rp, s32 y);
void RenderShadowMaskScanline(RendererPolygon* rp, s32 y);
bool RenderShadowMaskScanline(RendererPolygon* rp, s32 y, bool odd);
bool RenderPolygonScanline(RendererPolygon* rp, s32 y, bool odd);
bool RenderScanline(s32 y, int npolys, bool odd);
u32 CalculateFogDensity(u32 pixeladdr);
void ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool odd);
void ScanlineFinalPass(s32 y, u8 rdbufferoffset, bool odd, s32 uhohzone);
void ClearBuffers();
void RenderPolygons(bool threaded, Polygon** polygons, int npolys);