mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 05:17:40 -07:00
improved timings for the first 50 scanlines
This commit is contained in:
parent
24eecec50f
commit
a46316d71f
@ -222,8 +222,7 @@ void GPU3D::Reset() noexcept
|
||||
AlphaRefVal = 0;
|
||||
AlphaRef = 0;
|
||||
|
||||
RDLines = 46;
|
||||
RDLinesMin = 46;
|
||||
RDLinesDisplay = 46;
|
||||
|
||||
memset(ToonTable, 0, sizeof(ToonTable));
|
||||
memset(EdgeTable, 0, sizeof(EdgeTable));
|
||||
@ -2370,7 +2369,7 @@ void GPU3D::CheckFIFODMA() noexcept
|
||||
|
||||
void GPU3D::VCount144() noexcept
|
||||
{
|
||||
RDLinesMin = 46;
|
||||
RDLinesDisplay = 46;
|
||||
CurrentRenderer->VCount144();
|
||||
}
|
||||
|
||||
@ -2614,7 +2613,7 @@ u16 GPU3D::Read16(u32 addr) noexcept
|
||||
return DispCnt;
|
||||
|
||||
case 0x04000320:
|
||||
return RDLines; // IT IS TIME
|
||||
return RDLinesDisplay; // IT IS TIME
|
||||
|
||||
case 0x04000600:
|
||||
{
|
||||
@ -2658,7 +2657,7 @@ u32 GPU3D::Read32(u32 addr) noexcept
|
||||
return DispCnt;
|
||||
|
||||
case 0x04000320:
|
||||
return RDLines; // IT IS TIME
|
||||
return RDLinesDisplay; // IT IS TIME
|
||||
|
||||
case 0x04000600:
|
||||
{
|
||||
|
21
src/GPU3D.h
21
src/GPU3D.h
@ -245,8 +245,7 @@ public:
|
||||
bool RenderingEnabled = false;
|
||||
|
||||
u32 DispCnt = 0;
|
||||
u32 RDLines = 0;
|
||||
u32 RDLinesMin = 0;
|
||||
u32 RDLinesDisplay = 0;
|
||||
u8 AlphaRefVal = 0;
|
||||
u8 AlphaRef = 0;
|
||||
|
||||
@ -334,13 +333,21 @@ public:
|
||||
// rasteriztion timing constants
|
||||
static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
|
||||
|
||||
// GPU 2D read timings, for emulating race conditions
|
||||
static constexpr int GPU2DSpeedWithinPair = 296 * TimingFrac;
|
||||
static constexpr int GPU2DSpeedOutsidePair = 948 * TimingFrac;
|
||||
static constexpr int ScanlinePairLength = 2130 * TimingFrac;
|
||||
static constexpr int ScanlineTimeout = 2126 * TimingFrac;
|
||||
static constexpr int GPU2DSpeedOutsidePair = 810 * TimingFrac;
|
||||
static constexpr int GPU2DSpeedReadScanline = 256 * TimingFrac;
|
||||
static constexpr int InitGPU2DTimeout = 51618 * TimingFrac;
|
||||
static constexpr int ScanlineBreak = 4 * TimingFrac;
|
||||
|
||||
// GPU 3D rasterization timings, for emulating the timeout
|
||||
static constexpr int ScanlinePairLength = 2130 * TimingFrac;
|
||||
static constexpr int ScanlineTimeout = 1686 * TimingFrac; // 2126? 1686?
|
||||
static constexpr int ScanlineBreak = 4 * TimingFrac;
|
||||
static constexpr int ScanlineBreak2 = 40 * TimingFrac;
|
||||
static constexpr int IncrementStrange = 1618 * TimingFrac; // 1882? 1442? 1618??
|
||||
static constexpr int FreeTiming = 440 * TimingFrac;
|
||||
|
||||
// GPU 3D rasterization timings II, for counting each element with timing characteristics
|
||||
static constexpr int PerPolyTiming = 12 * TimingFrac; // should be correct for *most* line polygons and polygons with vertical slopes
|
||||
static constexpr int PerPixelTiming = 1 * TimingFrac; // does not apply to the first 4 pixels in a polygon (per scanline?)
|
||||
|
||||
@ -348,7 +355,7 @@ public:
|
||||
static constexpr int PerScanlineTiming = 1064 * TimingFrac; // approximate currently, used to calc RDLines. TEMPORARY UNTIL ACCURATE "FRAMEBUFFER" CAN BE IMPLEMENTED
|
||||
static constexpr int PerScanlineRecup = 2112 * TimingFrac; // seems to check out? // should be the "free" time the gpu has to do the calculation
|
||||
static constexpr int PerRightSlope = 1 * TimingFrac;
|
||||
static constexpr int EmptyPolyScanline = 4 * TimingFrac - 14; // seems to be slightly under 4?
|
||||
static constexpr int EmptyPolyScanline = 4 * TimingFrac;// - 14; // seems to be slightly under 4?
|
||||
//static constexpr int FirstPixelTiming;
|
||||
|
||||
class Renderer3D
|
||||
|
@ -121,44 +121,43 @@ bool SoftRenderer::DoTimings(s32 cycles, bool odd)
|
||||
else counter = &RasterTimingEven;
|
||||
|
||||
*counter += cycles;
|
||||
if (RasterTiming - *counter) return false;
|
||||
if (RasterTiming - *counter > 0) return false;
|
||||
|
||||
GPU.GPU3D.DispCnt |= (1<<12);
|
||||
return true;
|
||||
}
|
||||
|
||||
u32 SoftRenderer::DoTimingsPixels(u32 pixels, bool odd)
|
||||
u32 SoftRenderer::DoTimingsPixels(s32 pixels, bool odd)
|
||||
{
|
||||
// calculate and return the difference between the old span and the new span, while adding timings to the timings counter
|
||||
|
||||
// pixels dont count towards timings if they're the first 4 pixels in a scanline (for some reason?)
|
||||
if (pixels <= 4) return 0;
|
||||
|
||||
u32 pixelsremain = pixels-4;
|
||||
pixels -= 4;
|
||||
|
||||
s32* counter;
|
||||
if (odd) counter = &RasterTimingOdd;
|
||||
else counter = &RasterTimingEven;
|
||||
|
||||
//todo: do this without a for loop somehow.
|
||||
for (; pixelsremain > 0; pixelsremain--)
|
||||
//todo: figure out a faster way to support TimingFrac > 1 without using a for loop somehow.
|
||||
if constexpr (TimingFrac > 1)
|
||||
for (; pixels > 0; pixels--)
|
||||
{
|
||||
*counter += TimingFrac;
|
||||
if ((RasterTiming - *counter) <= 0) break;
|
||||
}
|
||||
else
|
||||
{
|
||||
*counter += TimingFrac;
|
||||
if (!(RasterTiming - *counter)) break;
|
||||
*counter += pixels;
|
||||
pixels = -(RasterTiming - *counter);
|
||||
if (pixels > 0) *counter -= pixels;
|
||||
}
|
||||
|
||||
if (pixelsremain <= 0) return 0;
|
||||
if (pixels <= 0) return 0;
|
||||
|
||||
GPU.GPU3D.DispCnt |= (1<<12);
|
||||
return pixelsremain;
|
||||
}
|
||||
|
||||
void SoftRenderer::EndScanline(bool odd)
|
||||
{
|
||||
if (!odd)
|
||||
{
|
||||
RasterTiming += std::max(RasterTimingOdd, RasterTimingEven);
|
||||
}
|
||||
return pixels;
|
||||
}
|
||||
|
||||
void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
|
||||
@ -1458,7 +1457,6 @@ bool SoftRenderer::RenderScanline(s32 y, int npolys, bool odd)
|
||||
}
|
||||
}
|
||||
|
||||
EndScanline(odd);
|
||||
return abort;
|
||||
}
|
||||
|
||||
@ -1758,30 +1756,59 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
|
||||
s32 y = 0;
|
||||
s8 prevbufferline = -2;
|
||||
|
||||
u8 buffersize = 0;
|
||||
RasterTiming = (ScanlinePairLength * 24);
|
||||
RasterTimingOdd = 0;
|
||||
RasterTimingEven = 0;
|
||||
|
||||
s8 buffersize = 0;
|
||||
RasterTiming = InitialTiming;
|
||||
s32 timingadvance = InitialTiming;
|
||||
bool abort = false;
|
||||
//u32* RDLinesReg = &GPU.GPU3D.RDLines;
|
||||
ClearBuffers();
|
||||
for (u8 quarter = 0; quarter < 4; quarter++)
|
||||
for (u8 bufferline = 0; bufferline < 48; bufferline += 2)
|
||||
{
|
||||
RasterTiming += (ScanlineTimeout);
|
||||
RasterTimingOdd = 0;
|
||||
RasterTimingEven = 0;
|
||||
|
||||
if (buffersize > 48)
|
||||
{
|
||||
RasterTiming = ScanlinePairLength * 23;
|
||||
timingadvance = 0;
|
||||
buffersize = 48;
|
||||
}
|
||||
if (!abort) RasterTiming += IncrementStrange;
|
||||
else RasterTiming += ScanlineTimeout;
|
||||
|
||||
if (buffersize >= 50) RasterTiming = (ScanlinePairLength * 23) + ScanlineTimeout;
|
||||
abort = RenderScanline(y, j, true);
|
||||
abort = RenderScanline(y+1, j, false);
|
||||
|
||||
RenderScanline(y, j, true);
|
||||
RenderScanline(y+1, j, false);
|
||||
RasterTiming += ScanlineBreak;
|
||||
buffersize += 2;
|
||||
//RasterTiming += ScanlineBreak;
|
||||
s32 timespent = std::max(RasterTimingOdd, RasterTimingEven);
|
||||
|
||||
u32* RDLinesReg = &GPU.GPU3D.RDLines;
|
||||
*RDLinesReg = 0;
|
||||
for (int i = RasterTiming; i > ScanlinePairLength / 2; i -= ScanlinePairLength / 2) *RDLinesReg += 1;
|
||||
/*if (timespent > FreeTiming)
|
||||
{
|
||||
abort = true;
|
||||
timespent -= FreeTiming;
|
||||
}
|
||||
else if (!abort)
|
||||
{
|
||||
abort = false;
|
||||
timespent -= FreeTiming;
|
||||
}*/
|
||||
//if (!abort)
|
||||
timespent -= FreeTiming;
|
||||
|
||||
if (timespent > 0)
|
||||
{
|
||||
RasterTiming -= timespent;
|
||||
timingadvance -= timespent;
|
||||
}
|
||||
|
||||
if (timingadvance < 0) for (s32 i = (ScanlinePairLength / 2) * buffersize; i > RasterTiming + (ScanlinePairLength / 2); i -= ScanlinePairLength / 2) buffersize -= 1;
|
||||
if (buffersize < 0) buffersize = 0;
|
||||
|
||||
// seems to display the lowest scanline buffer count reached during the current frame.
|
||||
// we also caps it to 46 here, because this reg does that too for some reason.
|
||||
if (*RDLinesReg > GPU.GPU3D.RDLinesMin) *RDLinesReg = GPU.GPU3D.RDLinesMin;
|
||||
else if (*RDLinesReg < GPU.GPU3D.RDLinesMin) GPU.GPU3D.RDLinesMin = *RDLinesReg;
|
||||
if (quarter >= 1 && buffersize < GPU.GPU3D.RDLinesDisplay) GPU.GPU3D.RDLinesDisplay = buffersize;
|
||||
|
||||
if (prevbufferline >= 0)
|
||||
{
|
||||
|
@ -455,8 +455,7 @@ private:
|
||||
melonDS::GPU& GPU;
|
||||
RendererPolygon PolygonList[2048];
|
||||
bool DoTimings(s32 cycles, bool odd);
|
||||
u32 DoTimingsPixels(u32 pixels, bool odd);
|
||||
void EndScanline(bool odd);
|
||||
u32 DoTimingsPixels(s32 pixels, bool odd);
|
||||
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
|
||||
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
|
||||
void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
|
||||
|
Loading…
Reference in New Issue
Block a user