wip - rewrite 3 - scheduler edition

This commit is contained in:
Jaklyy 2023-12-23 00:38:39 -05:00
parent a338ef1c8a
commit 78da2846e6
3 changed files with 333 additions and 124 deletions

View File

@ -329,18 +329,25 @@ public:
u32 ScrolledLine[256];
};
// rasteriztion timing constants
// Rasterization Timing Constants
static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
// GPU 2D read timings, for emulating race conditions
// GPU 2D Read Timings: For Emulating Buffer Read/Write Race Conditions
static constexpr int DelayBetweenReads = 809 * TimingFrac;
static constexpr int ScanlineReadSpeed = 256 * TimingFrac;
static constexpr int ScanlineReadInc = DelayBetweenReads + ReadScanline;
static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac; // the delay between finishing reading the first scanline and beginning reading the second scanline of a scanline pair.
static constexpr int GPU2DSpeedOutsidePair = 810 * TimingFrac; // the delay between finishing reading a pair and beginning reading a new pair.
static constexpr int GPU2DReadScanline = 256 * TimingFrac; // the time it takes to read a scanline.
static constexpr int GPU2DReadSLPair = 1618 * TimingFrac; // notably the same as the scanline increment.
static constexpr int InitGPU2DTimeout = 50000 * TimingFrac; // 51618? | when it starts reading the first scanline.
static constexpr int GPU2D48Scanlines = GPU2DReadSLPair * 48 * TimingFrac; // time to read 48 scanlines.
// GPU 3D rasterization timings, for emulating the timeout
static constexpr int GPU2DSpeedFirstInPair = 810 * TimingFrac; // 810 | the delay between finishing reading a pair and beginning reading a new pair.
static constexpr int GPU2DSpeedSecondInPair = 296 * TimingFrac; // 296 | 295??? | the delay between finishing reading the first scanline
// and beginning reading the second scanline of a scanline pair.
static constexpr int GPU2DReadScanline = 256 * TimingFrac; // 256 | the time it takes to read a scanline.
static constexpr int GPU2DReadSLPair = 1618 * TimingFrac; // 1618 | notably the same as the scanline increment.
static constexpr int InitGPU2DTimeout = 51874 * TimingFrac; // 51618? | when it starts reading the first scanline.
static constexpr int GPU2D48Scanlines = GPU2DReadSLPair * 24; // time to read 48 scanlines.
// GPU 3D Rasterization Timings: For Emulating Scanline Timeout
//static constexpr int ScanlinePairLength = 2130 * TimingFrac;
//static constexpr int ScanlineTimeout = 1686 * TimingFrac; // 2126? 1686?
@ -349,24 +356,36 @@ public:
//static constexpr int FakeTiming = 2 * TimingFrac;
//static constexpr int FraudulentTiming = 1120 * TimingFrac; // bad theory. todo: find a better one.
static constexpr int InitialTiming = 48688 * TimingFrac; // 48688 | add 1618*2 to get the timeout of the second scanline pair
static constexpr int Post50Max = 51116 * TimingFrac; // 51116 | for some reason it doesn't care about how full it actually is, it just cares about if its the first 50 scanlines to speedrun rendering?
static constexpr int FreeTiming = 496 * TimingFrac; // 496 | every scanline has a free 496 pixels worth of timing for some reason.
static constexpr int ScanlineIncrement = 1618 * TimingFrac; // 1618 | how much to regain per scanline pair
static constexpr int AbortIncrement = 12 * TimingFrac; // 12 | how much extra to regain after an aborted scanline (total 1630)
static constexpr int Post50Max = 51116 * TimingFrac; // 51116 | for some reason it doesn't care about how full it actually is,
// it just cares about if its the first 50 scanlines to speedrun rendering?
static constexpr int FinalPassLen = 500 * TimingFrac; // 496 (might technically be 500?) | the next scanline cannot begin while a scanline's final pass is in progress
// (can be interpreted as the minimum amount of cycles for the next scanline
// pair to start after the previous pair began) (related to final pass?)
static constexpr int ScanlineIncrementold = 1618 * TimingFrac; // 1618 | how much to regain per scanline pair
static constexpr int ScanlineIncrement = 2114 * TimingFrac; // 2114 | how much time a scanline pair "gains"
static constexpr int AbortIncrement = 12 * TimingFrac; // 12 | how much extra to regain after an aborted scanline (total 2126)
// (why does the next pair get more time if the previous scanline is aborted?)
static constexpr int UnderflowFlag = 14 * TimingFrac; // 14 | How many cycles need to be left for the 3ddispcnt rdlines underflow flag to be set
static constexpr int RastDelay = 4 * TimingFrac; // 4 | Min amount of cycles to begin a scanline? (minimum time it takes to init the first polygon?)
// (Amount of time before the end of the cycle a scanline must abort?)
static constexpr int FinishScanline = 512 * TimingFrac;
// GPU 3D rasterization timings II, for counting each element with timing characteristics
// GPU 3D Rasterization Timings II: For Tracking Timing Behaviors
//static constexpr int FirstPolyScanline = 0 * TimingFrac;
static constexpr int PerPolyScanline = 12 * TimingFrac; // 12 | should be 12, but 14 is "correct" // should be correct for *most* line polygons and polygons with vertical slopes
static constexpr int PerPolyScanline = 12 * TimingFrac; // 12 | The basic timing cost for polygons. Applies per polygon per scanline.
static constexpr int PerPixelTiming = 1 * TimingFrac; // 1 | 1 pixel = 1 pixel
static constexpr int NumFreePixels = 4; // 4 | First 4 pixels in a polygon scanline are free (for some reason)
static constexpr int MinToStartPoly = 2 * TimingFrac; // 1 | if there is not 1 cycle remaining, do not bother rendering polygon (CHECKME: I dont think this should decrement timings by anything?)
static constexpr int EmptyPolyScanline = 4 * TimingFrac; // - 14; // 4 | seems to be slightly under 4 px?
static constexpr int MinToStartPoly = 2 * TimingFrac; // 1 | if there aren't 2 (why two?) cycles remaining after the polygon timing penalty,
// do not bother rendering the polygon (CHECKME: I dont think this should decrement timings by anything?)
static constexpr int EmptyPolyScanline = 4 * TimingFrac; // 4 | the ignored "empty" bottom-most scanline of a polygon
// which shouldn't be rendered for some reason has timing characteristics.
// GPU 3D rasterization timing III, for first polygon exclusive timing characteristics
// should be done first, as these are "async" pre-calcs of polygon attributes
// GPU 3D Rasterization Timings III, For First Polygon "Pre-Calc" Timings
// should be added before other timings, as these are "async" pre-calcs of polygon attributes
static constexpr int FirstPerSlope = 1 * TimingFrac; // 1 | for each "slope" the first polygon has in this scanline increment it by 1. (see DoTimingsSlopes in GPU3D_Soft.cpp for more info)
static constexpr int FirstPerSlope = 1 * TimingFrac; // 1 | for each "slope" the first polygon has in this scanline increment it by 1.
// (see DoTimingsSlopes() in GPU3D_Soft.cpp for more info)
static constexpr int FirstNull = 1 * TimingFrac; // 1 | if the first polygon is "null" (probably wrong?)
// static constexpr int RasterTimingCap = 51116 * TimingFrac;

View File

@ -114,7 +114,7 @@ void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
}
}
bool SoftRenderer::DoTimings(GPU3D& gpu3d, s32 cycles, bool odd)
bool SoftRenderer::DoTimings(s32 cycles, bool odd)
{
// add timings to a counter and check if underflowed.
@ -123,9 +123,8 @@ bool SoftRenderer::DoTimings(GPU3D& gpu3d, s32 cycles, bool odd)
else counter = &RasterTimingEven;
*counter += cycles;
if (RasterTiming - *counter > 0) return false;
if (RasterTiming + *counter > ScanlineTimeout) return false;
gpu3d.DispCnt |= (1<<12);
return true;
}
@ -137,11 +136,11 @@ bool SoftRenderer::CheckTimings(s32 cycles, bool odd)
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
if (RasterTiming - *counter >= cycles) return true;
if (ScanlineTimeout - (RasterTiming + *counter) >= cycles) return true;
else return false;
}
u32 SoftRenderer::DoTimingsPixels(GPU3D& gpu3d, s32 pixels, bool odd)
u32 SoftRenderer::DoTimingsPixels(s32 pixels, bool odd)
{
// calculate and return the difference between the old span and the new span, while adding timings to the timings counter
@ -154,27 +153,18 @@ u32 SoftRenderer::DoTimingsPixels(GPU3D& gpu3d, s32 pixels, bool odd)
if (odd) counter = &RasterTimingOdd;
else counter = &RasterTimingEven;
//todo: figure out a faster way to support TimingFrac > 1 without using a for loop somehow. (fingers crossed we dont have to!)
if constexpr (TimingFrac > 1)
for (; pixels > 0; pixels--)
{
*counter += TimingFrac;
if ((RasterTiming - *counter) <= 0) break;
}
else
{
*counter += pixels;
pixels = -(RasterTiming - *counter);
if (pixels > 0) *counter -= pixels;
}
if (pixels <= 0) return 0;
*counter += pixels;
pixels = -(ScanlineTimeout - (RasterTiming + *counter));
gpu3d.DispCnt |= (1<<12);
return pixels;
if (pixels > 0)
{
*counter -= pixels;
return pixels;
}
else return 0;
}
bool SoftRenderer::DoTimingsSlopes(GPU3D& gpu3d, RendererPolygon* rp, s32 y, bool odd)
bool SoftRenderer::DoTimingsSlopes(RendererPolygon* rp, s32 y, bool odd)
{
// determine the timing impact of the first polygon's slopes.
@ -191,7 +181,7 @@ bool SoftRenderer::DoTimingsSlopes(GPU3D& gpu3d, RendererPolygon* rp, s32 y, boo
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) *counter += FirstPerSlope;
return DoTimings(gpu3d, FirstPerSlope*2, odd); // CHECKME: does this need to be done every time its incremented here? does this even need to be done *at all?*
return DoTimings(FirstPerSlope*2, odd); // CHECKME: does this need to be done every time its incremented here? does this even need to be done *at all?*
}
void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const
@ -778,7 +768,7 @@ void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
}
}
bool SoftRenderer::RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s32 y, bool odd)
bool SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, bool odd)
{
Polygon* polygon = rp->PolyData;
@ -913,7 +903,7 @@ bool SoftRenderer::RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s
// determine if the span can be rendered within the time allotted to the scanline
// TODO: verify the timing characteristics of shadow masks are the same as regular polygons.
s32 diff = DoTimingsPixels(gpu3d, xend-x, odd);
s32 diff = DoTimingsPixels(xend-x, odd);
if (diff != 0)
{
xend -= diff;
@ -934,7 +924,7 @@ bool SoftRenderer::RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s
else
for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x);
@ -959,7 +949,7 @@ bool SoftRenderer::RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s
if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x);
@ -984,7 +974,7 @@ bool SoftRenderer::RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s
if (r_filledge)
for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x);
@ -1006,7 +996,7 @@ bool SoftRenderer::RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s
return abortscanline;
}
bool SoftRenderer::RenderPolygonScanline(GPU& gpu, RendererPolygon* rp, s32 y, bool odd)
bool SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, bool odd)
{
Polygon* polygon = rp->PolyData;
u32 polyattr = (polygon->Attr & 0x3F008000);
@ -1163,7 +1153,7 @@ bool SoftRenderer::RenderPolygonScanline(GPU& gpu, RendererPolygon* rp, s32 y, b
if (xend > 256) xend = 256;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(gpu.GPU3D, xend-x, odd);
s32 diff = DoTimingsPixels(xend-x, odd);
if (diff != 0)
{
xend -= diff;
@ -1460,7 +1450,7 @@ bool SoftRenderer::RenderPolygonScanline(GPU& gpu, RendererPolygon* rp, s32 y, b
return abortscanline;
}
bool SoftRenderer::RenderScanline(GPU& gpu, s32 y, int npolys, bool odd)
bool SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys, bool odd)
{
bool abort = false;
bool first = true;
@ -1471,16 +1461,16 @@ bool SoftRenderer::RenderScanline(GPU& gpu, s32 y, int npolys, bool odd)
if (y == polygon->YBottom && y != polygon->YTop)
{
if (!abort) abort = (first && DoTimings(gpu.GPU3D, FirstNull, odd)) || DoTimings(gpu.GPU3D, EmptyPolyScanline, odd);
if (!abort) abort = (first && DoTimings(FirstNull, odd)) || DoTimings(EmptyPolyScanline, odd);
first = false;
}
else if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{
//if (y == polygon->YTop) if(DoTimings(gpu.GPU3D, FirstPolyScanline, odd)) abort = true;
//if (y == polygon->YTop) if(DoTimings(FirstPolyScanline, odd)) abort = true;
if (!abort) abort = (first && DoTimingsSlopes(gpu.GPU3D, rp, y, odd)) // incorrect. needs research; behavior is strange...
|| DoTimings(gpu.GPU3D, PerPolyScanline, odd)
if (!abort) abort = (first && DoTimingsSlopes(rp, y, odd)) // incorrect. needs research; behavior is strange...
|| DoTimings(PerPolyScanline, odd)
|| (!CheckTimings(MinToStartPoly, odd));
if (abort)
@ -1539,7 +1529,7 @@ u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const
return density;
}
void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y, u8 rdbufferoffset, bool odd, s32 uhohzone)
void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
{
// to consider:
// clearing all polygon fog flags if the master flag isn't set?
@ -1707,18 +1697,6 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y, u8 rdbufferoffse
ColorBuffer[pixeladdr] = topR | (topG << 8) | (topB << 16) | (topA << 24);
}
}
// if the first two scanlines are late then it's delayed by 48 scanlines
if (false)//late)
{
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth);
memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], 4 * ScanlineWidth);
}
else
{
memcpy(&RDBuffer[rdbufferoffset*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], 4 * ScanlineWidth);
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[rdbufferoffset*ScanlineWidth], 4 * ScanlineWidth);
}
}
void SoftRenderer::ClearBuffers(const GPU& gpu)
@ -1784,7 +1762,50 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
}
}
void SoftRenderer::RenderPolygons(GPU& gpu, bool threaded, Polygon** polygons, int npolys)
u16 SoftRenderer::BeginPushScanline(s32 y, s32 pixelstodraw)
{
// push the finished scanline to the appropriate frame buffers.
// if a scanline is late enough to intersect with the 2d engine read time it will be partially drawn
u16 start;
if (pixelstodraw > 256)
{
start = 0;
pixelstodraw = 256;
}
else if (pixelstodraw <= 0)
{
return 256;
}
else
{
start = ScanlineWidth - pixelstodraw;
// it seems to read in pairs of two every two cycles? looks jittery
bool jitter = pixelstodraw % 2;
// chcckme: + & - might be backwards
pixelstodraw += jitter;
start -= jitter;
}
bufferpos = y % 48;
memcpy(&RDBuffer[bufferpos*ScanlineWidth+start], &ColorBuffer[y*ScanlineWidth+start], 4 * pixelstodraw);
return start;
}
void SoftRenderer::ReadScanline(s32 y)
{
memcpy(&FinalBuffer[y*ScanlineWidth], &RDBuffer[bufferpos*ScanlineWidth], 4 * ScanlineWidth);
}
void SoftRenderer::FinishPushScanline(s32 y s32 pixelsremain)
{
if (pixelsremain = 0) return;
bufferpos = y % 48;
memcpy(&RDBuffer[bufferpos*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], 4 * pixelsremain);
}
template <bool threaded>
void SoftRenderer::RenderPolygons(GPU& gpu, Polygon** polygons, int npolys)
{
int j = 0;
for (int i = 0; i < npolys; i++)
@ -1792,16 +1813,186 @@ void SoftRenderer::RenderPolygons(GPU& gpu, bool threaded, Polygon** polygons, i
if (polygons[i]->Degenerate) continue;
SetupPolygon(&PolygonList[j++], polygons[i]);
}
ClearBuffers(gpu);
s32 rasterevents[RasterEvents_MAX];
s32 y = 0;
s32 yold;
rasterevents[RenderStart] = 0;
rasterevents[RenderFinal] = INT_MAX/2;
rasterevents[RenderFinalP2] = INT_MAX;
rasterevents[ScanlineWrite] = INT_MAX;
rasterevents[ScanlineRead] = InitGPU2DTimeout;
ScanlineTimeout = INT_MAX;
RasterTiming = 0;
RasterTimingEven = 0;
RasterTimingOdd = 0;
u8 scanlinesread = 0
u8 scanlinesrendered;
s8 scanlineswaiting = 0;
u8 nextevent;
u16 leftoversa;
u16 leftoversb;
bool finalunsched = true;
while (scanlinesread < 192)
{
nextevent = 0;
for (int i = 1; i < RasterEvents_MAX - finalunsched; i++)
{
if (rasterevents[nextevent] > rasterevents[i])
nextevent = i;
}
switch (nextevent)
{
case RenderStart:
bool abort = RenderScanline(gpu, y, j, true);
abort |= RenderScanline(gpu, y+1, j, false);
timespent = std::max(RasterTimingEven, RasterTimingOdd);
RasterTiming += timespent;
if ((RasterTiming + timespent) < (rasterevents[RenderFinal]+FinalPassLen))
RasterTiming += FinalPassLen;
else
RasterTiming += timespent;
s32 timeoutdist = ScanlineTimeout - RasterTiming;
RasterTiming += std::clamp(timeoutdist, 0, 12);
rasterevents[RenderFinal] = RasterTiming;
rasterevents[RenderScanline] = RasterTiming+RastDelay;
finalunsched = false;
break;
case RenderFinal:
if (y > 2)
{
ScanlineFinalPass(gpu.GPU3D, y-1);
leftoversa = BeginPushScanline(y-1, (rasterevents[ScanlineRead] - ScanlineReadSpeed) - (rasterevents[RenderFinal] + FinalPassLen));
if (leftoversa != 0)
{
rasterevents[RenderFinalP2] = rasterevents[ScanlineRead] - ScanlineReadSpeed;
yold = y;
}
else
{
scanlineswaiting++;
scanlinesrendered++;
}
}
if (y < 192)
{
ScanlineFinalPass(gpu.GPU3D, y);
leftoversb = BeginPushScanline(y, (rasterevents[ScanlineRead] + DelayBetweenReads) - (rasterevents[RenderFinal] + FinalPassLen));
if (leftoversb != 0)
{
rasterevents[RenderFinalP2] = rasterevents[ScanlineRead] + DelaybetweenReads;
yold = y;
}
else
{
scanlineswaiting++;
scanlinesrendered++;
}
finalunsched = true;
}
else
{
rasterevents[RenderFinal] += FinalPassLen;
}
y += 2;
break;
case ScanlineRead:
ReadScanline(scanlinesread);
rasterevents[ScanlineRead] += ScanlineIncrement;
if constexpr (threaded)
Platform::Semaphore_Post(Sema_ScanlineCount);
scanlinesread++;
scanlineswaiting--;
break;
case RenderFinalP2:
if (y > 2)
{
FinishPushScanline(yold-1, leftoversa);
scanlineswaiting++;
scanlinesrendered++;
}
if (y < 192)
{
FinishPushScanline(yold, leftoversb);
scanlineswaiting++;
scanlinesrendered++;
}
rasterevents[RenderFinalP2] = INT_MAX;
break;
}
}
}
/*ScanlineRead = InitGPU2DTimeout;
ScanlineTimeout = INT_MAX;
RasterTiming = 0;
s32 prevscanlineread;
s32 prevrastertiming;
for (y = 0; y < 192; y += 2)
{
RasterTimingEven = 0;
RasterTimingOdd = 0;
// scanlines are rendered in pairs simultaneously
bool abort = RenderScanline(gpu, y, j, true);
abort |= RenderScanline(gpu, y+1, j, false);
timespent = std::max(RasterTimingEven, RasterTimingOdd);
if (timespend > FreeTiming)
RasterTiming += timespent;
// the next loop begins
if (y!=0)
{
// finish second scanline from 2 pairs back
ScanlineFinalPass(gpu.GPU3D, y-1);
PushScanline(y-1, (ScanlineRead+GPU2DReadScanline)-(RasterTiming+FinishScanline));
ScanlineRead += ScanlineReadInc;
if constexpr (threaded) Platform::Semaphore_Post(Sema_ScanlineCount);
}
// finish previous first scanline
ScanlineFinalPass(gpu.GPU3D, y);
PushScanline(y, (ScanlineRead+GPU2DReadScanline)-(RasterTiming+FinishScanline));
ScanlineRead += ScanlineReadInc;
if constexpr (threaded) Platform::Semaphore_Post(Sema_ScanlineCount);
y += 2;
}
RasterTiming +=
// one more loop just to finish off the final scanline
ScanlineFinalPass(gpu.GPU3D, 191);
PushScanline(191, (ScanlineRead+GPU2DReadScanline)-(RasterTiming+FinishScanline));
if constexpr (threaded) Platform::Semaphore_Post(Sema_ScanlineCount);
*/
/*s32 y = 0;
s8 prevbufferline = -2;
s8 buffersize = 0;
RasterTiming = InitialTiming;
RasterTiming = INT_MAX/2;
bool abort = false;
ClearBuffers(gpu);
s32 gpu2dtracking = InitGPU2DTimeout;
s32 gpu2dfreetime = InitGPU2DTimeout;
s32 prev2dtime;
bool readodd = true;
@ -1811,13 +2002,14 @@ void SoftRenderer::RenderPolygons(GPU& gpu, bool threaded, Polygon** polygons, i
RasterTimingOdd = 0;
RasterTimingEven = 0;
if (y == 2) RasterTiming = InitialTiming;
RasterTiming += ScanlineIncrement;
gpu2dtracking += GPU2DReadSLPair;
if (abort) RasterTiming += AbortIncrement; // if previous scanline was aborted, allow an extra 12 pixels worth of timing
if (y >= 50)
{
gpu2dfreetime = 0;
if (RasterTiming > Post50Max)
{
s32 temp = RasterTiming - Post50Max;
@ -1834,62 +2026,49 @@ void SoftRenderer::RenderPolygons(GPU& gpu, bool threaded, Polygon** polygons, i
//RasterTiming += ScanlineBreak;
s32 timespent = std::max(RasterTimingOdd, RasterTimingEven);
if (RasterTiming - timespent <= UnderflowFlag) gpu.GPU3D.DispCnt |= (1<<12); // checkme: should this flag set itself every frame a "underflowed" frame is rendered, even if said frame is duplicated?
timespent -= FreeTiming;
// measure scanlines being read here.
gpu2dtracking -= timespent;
gpu2dfreetime -= timespent;
if (timespent > 0)
{
RasterTiming -= timespent;
gpu2dtracking -= timespent;
}
if (timespent > 0) RasterTiming -= timespent;
//if (RasterTiming < 0) RasterTiming = 0;
if (gpu2dfreetime <= 0)
{
buffersize = 0;
if (gpu2dtracking > 0)
{
s32 i = gpu2dtracking;
while (true)
{
s32 comp = GPU2DReadSLPair/2;
//if (readodd) comp = GPU2DSpeedOutsidePair + GPU2DReadScanline;
//else comp = GPU2DSpeedWithinPair + GPU2DReadScanline;
if (i < comp) break;
i -= comp;
buffersize++;
//readodd = !readodd;
}
buffersize = 0;
for (int i = gpu2dtracking; i > 0; i -= GPU2DReadSLPair/2) buffersize++;
if (i > 0) buffersize++;
}
// seems to display the lowest scanline buffer count reached during the current frame.
// we also caps it to 46 here, because this reg does that too for some reason.
if (gpu.GPU3D.RDLines > buffersize) gpu.GPU3D.RDLines = buffersize;
}
if (buffersize < gpu.GPU3D.RDLines) gpu.GPU3D.RDLines = buffersize;
if (prevbufferline >= 0)
{
ScanlineFinalPass(gpu.GPU3D, y-2, prevbufferline, true, prev2dtime);
ScanlineFinalPass(gpu.GPU3D, y-1, prevbufferline+1, false, prev2dtime);
if (threaded)
{
Platform::Semaphore_Post(Sema_ScanlineCount);
Platform::Semaphore_Post(Sema_ScanlineCount);
}
}
y += 2;
prevbufferline = bufferline;
prev2dtime = gpu2dtracking;
if (threaded)
Platform::Semaphore_Post(Sema_ScanlineCount);
}
ScanlineFinalPass(gpu.GPU3D, 190, prevbufferline, true, prev2dtime);
ScanlineFinalPass(gpu.GPU3D, 191, prevbufferline+1, false, prev2dtime);
if (threaded)
{
Platform::Semaphore_Post(Sema_ScanlineCount);
}
Platform::Semaphore_Post(Sema_ScanlineCount);
}
}*/
void SoftRenderer::VCount144(GPU& gpu)
{
@ -1911,7 +2090,7 @@ void SoftRenderer::RenderFrame(GPU& gpu)
{
Platform::Semaphore_Post(Sema_RenderStart);
}
else if (!FrameIdentical) RenderPolygons(gpu, false, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
else if (!FrameIdentical) RenderPolygons<false>(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}
void SoftRenderer::RestartFrame(GPU& gpu)
@ -1931,7 +2110,7 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
{
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
else RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
else RenderPolygons<true>(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
Platform::Semaphore_Post(Sema_RenderDone);
RenderThreadRendering = false;
@ -1946,7 +2125,7 @@ u32* SoftRenderer::GetLine(int line)
Platform::Semaphore_Wait(Sema_ScanlineCount);
}
return &FinalBuffer[(line * ScanlineWidth) + FirstPixelOffset];
return &FinalBuffer[line * ScanlineWidth];
}
}

View File

@ -453,10 +453,10 @@ private:
};
RendererPolygon PolygonList[2048];
bool DoTimings(GPU3D& gpu3d, s32 cycles, bool odd);
bool DoTimings(s32 cycles, bool odd);
bool CheckTimings(s32 cycles, bool odd);
u32 DoTimingsPixels(GPU3D& gpu3d, s32 pixels, bool odd);
bool DoTimingsSlopes(GPU3D& gpu3d, RendererPolygon* rp, s32 y, bool odd);
u32 DoTimingsPixels(s32 pixels, bool odd);
bool DoTimingsSlopes(RendererPolygon* rp, s32 y, bool odd);
void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const;
u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const;
void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
@ -465,21 +465,31 @@ private:
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
void Step(RendererPolygon* rp);
void CheckSlope(RendererPolygon* rp, s32 y);
bool RenderShadowMaskScanline(GPU3D& gpu3d, RendererPolygon* rp, s32 y, bool odd);
bool RenderPolygonScanline(GPU& gpu, RendererPolygon* rp, s32 y, bool odd);
bool RenderScanline(GPU& gpu, s32 y, int npolys, bool odd);
bool RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, bool odd);
bool RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, bool odd);
bool RenderScanline(const GPU& gpu, s32 y, int npolys, bool odd);
u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const;
void ScanlineFinalPass(const GPU3D& gpu3d, s32 y, u8 rdbufferoffset, bool odd, s32 uhohzone);
void ClearBuffers(const GPU& gpu);
void RenderPolygons(GPU& gpu, bool threaded, Polygon** polygons, int npolys);
template <bool threaded> void RenderPolygons(GPU& gpu, Polygon** polygons, int npolys);
voi PushScanline(s32 y, s32 pixelstodraw);
void RenderThreadFunc(GPU& gpu);
// counters for scanline rasterization timings
s32 RasterTiming = 0;
//s32 RasterTimingCounterPrev = 0;
s32 RasterTimingOdd = 0;
s32 RasterTimingEven = 0;
s32 ScanlineTimeout;
s32 RasterTiming;
s32 RasterTimingOdd;
s32 RasterTimingEven;
enum
{
RenderStart = 0,
ScanlineRead,
RenderFinalP2,
RenderFinal,
RasterEvents_MAX,
};
// buffer dimensions are 258x194 to add a offscreen 1px border
// which simplifies edge marking tests
@ -488,17 +498,18 @@ private:
// offscreen in that border
static constexpr int ScanlineWidth = 256;
static constexpr int NumScanlines = 192;
static constexpr int NumScanlinesIntBuf = 192;
static constexpr int NumScanlinesRD = 48;
static constexpr int NumScanlinesFinal = 192;
static constexpr int BufferSize = ScanlineWidth * NumScanlinesIntBuf;
static constexpr int RDBufferSize = ScanlineWidth * NumScanlinesRD;
static constexpr int BufferSize = ScanlineWidth * NumScanlines;
static constexpr int FirstPixelOffset = 0;
static constexpr int FinalBufferSize = ScanlineWidth * NumScanlinesFinal;
u32 ColorBuffer[BufferSize * 2];
u32 DepthBuffer[BufferSize * 2];
u32 AttrBuffer[BufferSize * 2];
u32 RDBuffer[RDBufferSize];
u32 FinalBuffer[BufferSize];
u32 RDBuffer[RDBufferSize]; // is this buffer ever initialized by hw before writing to it? what is its initial value? can you transfer 3d framebuffer data between games?
u32 FinalBuffer[FinalBufferSize];
// attribute buffer:
// bit0-3: edge flags (left/right/top/bottom)