This commit is contained in:
Jakly 2024-11-12 12:56:34 +01:00 committed by GitHub
commit 27d4d516c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 584 additions and 151 deletions

View File

@ -30,7 +30,7 @@ namespace melonDS
using Platform::Log; using Platform::Log;
using Platform::LogLevel; using Platform::LogLevel;
#define LINE_CYCLES (355*6) #define LINE_CYCLES (355*6)
#define HBLANK_CYCLES (48+(256*6)) #define HBLANK_CYCLES (48+(256*6))
#define FRAME_CYCLES (LINE_CYCLES * 263) #define FRAME_CYCLES (LINE_CYCLES * 263)
@ -879,6 +879,11 @@ void GPU::StartHBlank(u32 line) noexcept
DispStat[0] |= (1<<1); DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1); DispStat[1] |= (1<<1);
// TODO: not quite the correct update time, but... close enough i guess?
int scanline = (VCount == 262 ? 0 : (line+1));
if (!(scanline & 1)) GPU3D.ScanlineSync(scanline);
if (GPU3D.UnderflowFlagVCount == scanline) GPU3D.DispCnt |= (1<<12);
if (VCount < 192) if (VCount < 192)
{ {
// draw // draw
@ -1013,11 +1018,11 @@ void GPU::StartScanline(u32 line) noexcept
{ {
if (VCount == 192) if (VCount == 192)
{ {
// in reality rendering already finishes at line 144 // in reality rendering already finishes at line 144 (can take up to ~191 depending on load)
// and games might already start to modify texture memory. // and games might already start to modify texture memory.
// That doesn't matter for us because we cache the entire // That doesn't matter for us because we cache the entire
// texture memory anyway and only update it before the start // texture memory anyway and only update it before the start
//of the next frame. // of the next frame.
// So we can give the rasteriser a bit more headroom // So we can give the rasteriser a bit more headroom
GPU3D.VCount144(*this); GPU3D.VCount144(*this);

View File

@ -183,6 +183,8 @@ void GPU3D::ResetRenderingState() noexcept
RenderClearAttr1 = 0x3F000000; RenderClearAttr1 = 0x3F000000;
RenderClearAttr2 = 0x00007FFF; RenderClearAttr2 = 0x00007FFF;
RenderFrameIdentical = false;
} }
void GPU3D::Reset() noexcept void GPU3D::Reset() noexcept
@ -236,7 +238,7 @@ void GPU3D::Reset() noexcept
TotalParams = 0; TotalParams = 0;
GeometryEnabled = false; GeometryEnabled = false;
RenderingEnabled = false; RenderingEnabled = 0;
DispCnt = 0; DispCnt = 0;
AlphaRefVal = 0; AlphaRefVal = 0;
@ -255,6 +257,9 @@ void GPU3D::Reset() noexcept
ResetRenderingState(); ResetRenderingState();
UnderflowFlagVCount = -1;
RDLines = 63;
AbortFrame = false; AbortFrame = false;
Timestamp = 0; Timestamp = 0;
@ -549,12 +554,16 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Bool32(&AbortFrame); file->Bool32(&AbortFrame);
file->Bool32(&GeometryEnabled); file->Bool32(&GeometryEnabled);
file->Bool32(&RenderingEnabled); file->Var8(&RenderingEnabled);
file->Var32(&PolygonMode); file->Var32(&PolygonMode);
file->Var32(&PolygonAttr); file->Var32(&PolygonAttr);
file->Var32(&CurPolygonAttr); file->Var32(&CurPolygonAttr);
file->Var32(&TexParam); file->Var32(&TexParam);
file->Var32(&TexPalette); file->Var32(&TexPalette);
file->Var8(&RDLines);
file->Var8(&RDLinesTemp);
RenderFrameIdentical = false; RenderFrameIdentical = false;
if (softRenderer && softRenderer->IsThreaded()) if (softRenderer && softRenderer->IsThreaded())
{ {
@ -567,9 +576,19 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
void GPU3D::SetEnabled(bool geometry, bool rendering) noexcept void GPU3D::SetEnabled(bool geometry, bool rendering) noexcept
{ {
GeometryEnabled = geometry; GeometryEnabled = geometry;
RenderingEnabled = rendering; if (rendering)
{
if (!rendering) ResetRenderingState(); if (RenderingEnabled == 0)
{
RenderingEnabled = 1;
RDLinesTemp = 63; // CHECKME
}
}
else
{
ResetRenderingState();
RenderingEnabled = 0;
}
} }
@ -2458,12 +2477,16 @@ bool YSort(Polygon* a, Polygon* b)
void GPU3D::VBlank() noexcept void GPU3D::VBlank() noexcept
{ {
if (RenderingEnabled)
RDLines = RDLinesTemp;
if (GeometryEnabled) if (GeometryEnabled)
{ {
if (RenderingEnabled) if (RenderingEnabled >= 3)
{ {
if (FlushRequest) if (FlushRequest)
{ {
swap:
if (NumPolygons) if (NumPolygons)
{ {
// separate translucent polygons from opaque ones // separate translucent polygons from opaque ones
@ -2517,6 +2540,15 @@ void GPU3D::VBlank() noexcept
RenderClearAttr1 = ClearAttr1; RenderClearAttr1 = ClearAttr1;
RenderClearAttr2 = ClearAttr2; RenderClearAttr2 = ClearAttr2;
} }
else if (RenderingEnabled != 0)
{
if (FlushRequest)
{
RenderingEnabled++;
if (RenderingEnabled >= 3)
goto swap;
}
}
if (FlushRequest) if (FlushRequest)
{ {
@ -2545,6 +2577,10 @@ void GPU3D::SetRenderXPos(u16 xpos) noexcept
RenderXPos = xpos & 0x01FF; RenderXPos = xpos & 0x01FF;
} }
void GPU3D::ScanlineSync(int line) noexcept
{
CurrentRenderer->ScanlineSync(line);
}
u32* GPU3D::GetLine(int line) noexcept u32* GPU3D::GetLine(int line) noexcept
{ {
@ -2672,7 +2708,7 @@ u16 GPU3D::Read16(u32 addr) noexcept
return DispCnt; return DispCnt;
case 0x04000320: case 0x04000320:
return 46; // TODO, eventually return RDLines;
case 0x04000600: case 0x04000600:
{ {
@ -2716,7 +2752,7 @@ u32 GPU3D::Read32(u32 addr) noexcept
return DispCnt; return DispCnt;
case 0x04000320: case 0x04000320:
return 46; // TODO, eventually return RDLines;
case 0x04000600: case 0x04000600:
{ {

View File

@ -25,10 +25,12 @@
#include "Savestate.h" #include "Savestate.h"
#include "FIFO.h" #include "FIFO.h"
namespace melonDS namespace melonDS
{ {
class GPU; class GPU;
struct Vertex struct Vertex
{ {
s32 Position[4]; s32 Position[4];
@ -112,6 +114,7 @@ public:
void SetRenderXPos(u16 xpos) noexcept; void SetRenderXPos(u16 xpos) noexcept;
[[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; } [[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; }
void ScanlineSync(int line) noexcept;
u32* GetLine(int line) noexcept; u32* GetLine(int line) noexcept;
void WriteToGXFIFO(u32 val) noexcept; void WriteToGXFIFO(u32 val) noexcept;
@ -241,9 +244,18 @@ public:
u32 TotalParams = 0; u32 TotalParams = 0;
bool GeometryEnabled = false; bool GeometryEnabled = false;
bool RenderingEnabled = false; // 0 = powered off
// 1 = powered on, inactive
// 2 = one swap buffers, inactive
// 3 = two swap buffers, active;
u8 RenderingEnabled = 0;
u32 DispCnt = 0; u32 DispCnt = 0;
u16 UnderflowFlagVCount = 0;
u8 RDLines = 0;
u8 RDLinesTemp = 0;
u8 AlphaRefVal = 0; u8 AlphaRefVal = 0;
u8 AlphaRef = 0; u8 AlphaRef = 0;
@ -329,6 +341,69 @@ public:
u32 ScrolledLine[256]; // not part of the hardware state, don't serialize u32 ScrolledLine[256]; // not part of the hardware state, don't serialize
}; };
// Rasterization Timing Constants
static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
// GPU 2D Read Timings: For Emulating Buffer Read/Write Race Conditions
static constexpr int DelayBetweenReads = 809 * TimingFrac;
static constexpr int ScanlineReadSpeed = 256 * TimingFrac;
static constexpr int ScanlineReadInc = DelayBetweenReads + ScanlineReadSpeed;
static constexpr int InitGPU2DTimeout = (51875+565) * TimingFrac; // 51618? 51874? 52128? | when it finishes reading the first scanline.
static constexpr int FrameLength = ScanlineReadInc * 263; // how long the entire frame is. TODO: Verify if we actually need this?
// compile-time list of scanline read times
// these *should* always occur at the same point in each frame, so it shouldn't matter if we make them fixed
static constexpr std::array<u32, 192> SLRead = []() constexpr {
std::array<u32, 192> readtime {};
for (int i = 0, time = InitGPU2DTimeout; i < 192; i++, time += ScanlineReadInc)
{
readtime[i] = time;
}
return readtime;
}();
static constexpr int PreReadCutoff = 565; // time before a read that a scanline is cutoff.
// the point at which rdlines decrements. not sure why it's different...?
static constexpr std::array<u32, 192> RDDecrement = []() constexpr {
std::array<u32, 192> dec {};
for (int i = 0; i < 192; i++)
{
dec[i] = SLRead[i] - 39 - (!(i % 2));
}
return dec;
}();
// GPU 3D Rasterization Timings: For Emulating Scanline Timeout
static constexpr int FinalPassLen = 500 * TimingFrac; // 496 (might technically be 500?) | the next scanline cannot begin while a scanline's final pass is in progress
// (can be interpreted as the minimum amount of cycles for the next scanline
// pair to start after the previous pair began) (related to final pass?)
static constexpr int ScanlinePushDelay = 242 * TimingFrac;
static constexpr int EMGlitchThreshhold = 502 * TimingFrac; // The threshold for the edge marking glitch behavior to change.
static constexpr int EMFixNum = 571 * TimingFrac; // Arbitrary value added to fix edge marking glitch, not sure why it's needed?
// GPU 3D Rasterization Timings II: For Tracking Timing Behaviors
//static constexpr int FirstPolyScanline = 0 * TimingFrac;
static constexpr int PerPolyScanline = 12 * TimingFrac; // 12 | The basic timing cost for polygons. Applies per polygon per scanline.
static constexpr int PerPixelTiming = 1 * TimingFrac; // 1 | 1 pixel = 1 pixel
static constexpr int NumFreePixels = 4; // 4 | First 4 pixels in a polygon scanline are free (for some reason)
static constexpr int MinToStartPoly = 2 * TimingFrac; // 1 | if there aren't 2 (why two?) cycles remaining after the polygon timing penalty,
// do not bother rendering the polygon (CHECKME: I dont think this should decrement timings by anything?)
static constexpr int EmptyPolyScanline = 4 * TimingFrac; // 4 | the ignored "empty" bottom-most scanline of a polygon
// which shouldn't be rendered for some reason has timing characteristics.
// GPU 3D Rasterization Timings III, For First Polygon "Pre-Calc" Timings
// should be added before other timings, as these are "async" pre-calcs of polygon attributes
static constexpr int FirstPolyDelay = 4 * TimingFrac; // 4 | Min amount of cycles to begin a scanline? (minimum time it takes to init the first polygon?)
// (Amount of time before the end of the cycle a scanline must abort?)
class Renderer3D class Renderer3D
{ {
public: public:
@ -349,6 +424,7 @@ public:
virtual void RenderFrame(GPU& gpu) = 0; virtual void RenderFrame(GPU& gpu) = 0;
virtual void RestartFrame(GPU& gpu) {}; virtual void RestartFrame(GPU& gpu) {};
virtual u32* GetLine(int line) = 0; virtual u32* GetLine(int line) = 0;
virtual void ScanlineSync(int line) {};
virtual void Blit(const GPU& gpu) {}; virtual void Blit(const GPU& gpu) {};
virtual void SetupAccelFrame() {} virtual void SetupAccelFrame() {}

View File

@ -19,6 +19,7 @@
#include "GPU3D_Soft.h" #include "GPU3D_Soft.h"
#include <algorithm> #include <algorithm>
#include <initializer_list>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "NDS.h" #include "NDS.h"
@ -138,6 +139,108 @@ void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
} }
} }
bool SoftRenderer::DoTimings(s32 cycles, s32* timingcounter)
{
// add timings to a counter and return false if underflowed.
*timingcounter += cycles;
if (RasterTiming + *timingcounter <= ScanlineTimeout) return true;
else return false;
}
bool SoftRenderer::CheckTimings(s32 cycles, s32* timingcounter)
{
// check if there are 'cycles' amount of cycles remaining.
if (RasterTiming + *timingcounter <= ScanlineTimeout - cycles) return true;
else return false;
}
u32 SoftRenderer::DoTimingsPixels(s32 pixels, s32* timingcounter)
{
// calculate and return the difference between the old span and the new span, while adding timings to the timings counter
// pixels dont count towards timings if they're the first 4 pixels in a polygon scanline (for some reason?)
if (pixels <= NumFreePixels) return 0;
pixels -= NumFreePixels;
*timingcounter += pixels;
pixels = -(ScanlineTimeout - (RasterTiming + *timingcounter));
if (pixels > 0)
{
*timingcounter -= pixels;
return pixels;
}
else return 0;
}
void SoftRenderer::FindFirstPolyDoTimings(int npolys, s32 y, int* firstpolyeven, int* firstpolyodd, s32* timingcountereven, s32*timingcounterodd)
{
// TODO: actually figure this out
// The First Polygon in each scanline pair has some additional timing penalties (presumably due to pipelining of the rasterizer)
bool fixeddelay = false;
bool perslope = false;
bool etc = false;
for (*firstpolyeven = 0; *firstpolyeven < npolys; (*firstpolyeven)++)
{
RendererPolygon* rp = &PolygonList[*firstpolyeven];
Polygon* polygon = rp->PolyData;
if (y >= polygon->YTop && y <= polygon->YBottom)
{
fixeddelay = true;
break;
/*if (y == polygon->YBottom) break;
if (y == polygon->YTop) {perslope = true; break;}
else if ((y == polygon->Vertices[rp->NextVL]->FinalPosition[1] || y == polygon->Vertices[rp->CurVL]->FinalPosition[1]) ||
(y == polygon->Vertices[rp->NextVR]->FinalPosition[1] || y == polygon->Vertices[rp->CurVR]->FinalPosition[1]))
{
perslope = true;
}
else etc = true;
break;*/
}
}
y++;
for (*firstpolyodd = 0; *firstpolyodd < npolys; (*firstpolyodd)++)
{
RendererPolygon* rp = &PolygonList[*firstpolyodd];
Polygon* polygon = rp->PolyData;
if (y >= polygon->YTop && y <= polygon->YBottom)
{
fixeddelay = true;
break;
/*if (y == polygon->YBottom) break;
if (y == polygon->YTop) {perslope = true; break;}
else if ((y == polygon->Vertices[rp->NextVL]->FinalPosition[1] || y == polygon->Vertices[rp->CurVL]->FinalPosition[1]) ||
(y == polygon->Vertices[rp->NextVR]->FinalPosition[1] || y == polygon->Vertices[rp->CurVR]->FinalPosition[1]))
{
perslope = true;
}
else etc = true;
break;*/
}
}
*timingcountereven = fixeddelay ? FirstPolyDelay : 0;// + perslope*FirstPerSlope + etc*2;
*timingcounterodd = fixeddelay ? FirstPolyDelay : 0;// + perslope*FirstPerSlope + etc*2;
/*if (!perslope)
{
*timingcountereven += etc*2;// + perslope*FirstPerSlope + etc*2;
*timingcounterodd += etc*2;// + perslope*FirstPerSlope + etc*2;
}
else
{
*timingcountereven += perslope*FirstPerSlope;// + perslope*FirstPerSlope + etc*2;
*timingcounterodd += perslope*FirstPerSlope;// + perslope*FirstPerSlope + etc*2;
}*/
}
void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const
{ {
u32 vramaddr = (texparam & 0xFFFF) << 3; u32 vramaddr = (texparam & 0xFFFF) << 3;
@ -705,7 +808,31 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
} }
} }
void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y) void SoftRenderer::Step(RendererPolygon* rp)
{
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
}
void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
}
bool SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter)
{ {
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
@ -728,18 +855,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
PrevIsShadowMask = true; PrevIsShadowMask = true;
if (polygon->YTop != polygon->YBottom) CheckSlope(rp, y);
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
Vertex *vlcur, *vlnext, *vrcur, *vrnext; Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend; s32 xstart, xend;
@ -748,6 +864,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1>* interp_start;
Interpolator<1>* interp_end; Interpolator<1>* interp_end;
bool abortscanline; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
@ -831,7 +948,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// similarly, we can perform alpha test early (checkme) // similarly, we can perform alpha test early (checkme)
if (wireframe) polyalpha = 31; if (wireframe) polyalpha = 31;
if (polyalpha <= gpu3d.RenderAlphaRef) return; if (polyalpha <= gpu3d.RenderAlphaRef) return false; // TODO: check how this impacts timings?
// in wireframe mode, there are special rules for equal Z (TODO) // in wireframe mode, there are special rules for equal Z (TODO)
@ -841,25 +958,42 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(xend-x, timingcounter);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
else abortscanline = false;
// we cap it to 256 *after* counting the cycles, because yes, it tries to render oob pixels.
if (xend > 256)
{
r_edgelen += 256 - xend;
xend = 256;
}
// for shadow masks: set stencil bits where the depth test fails. // for shadow masks: set stencil bits where the depth test fails.
// draw nothing. // draw nothing.
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (!l_filledge) x = xlimit; if (!l_filledge) x = xlimit;
else else
for (; x < xlimit; x++) for (; x < xlimit; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x); interpX.SetX(x);
@ -879,13 +1013,12 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 2: polygon inside // part 2: polygon inside
edge = yedge; edge = yedge;
xlimit = xend-r_edgelen+1; xlimit = xend-r_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit); if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++) else for (; x < xlimit; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x); interpX.SetX(x);
@ -905,13 +1038,12 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 3: right edge // part 3: right edge
edge = yedge | 0x2; edge = yedge | 0x2;
xlimit = xend+1; xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (r_filledge) if (r_filledge)
for (; x < xlimit; x++) for (; x < xlimit; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x); interpX.SetX(x);
@ -929,14 +1061,13 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
} }
} }
rp->XL = rp->SlopeL.Step(); Step(rp);
rp->XR = rp->SlopeR.Step(); return abortscanline;
} }
void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y) bool SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter)
{ {
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
u32 polyattr = (polygon->Attr & 0x3F008000); u32 polyattr = (polygon->Attr & 0x3F008000);
if (!polygon->FacingView) polyattr |= (1<<4); if (!polygon->FacingView) polyattr |= (1<<4);
@ -953,18 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
PrevIsShadowMask = false; PrevIsShadowMask = false;
if (polygon->YTop != polygon->YBottom) CheckSlope(rp, y);
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
Vertex *vlcur, *vlnext, *vrcur, *vrnext; Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend; s32 xstart, xend;
@ -973,6 +1093,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1>* interp_start;
Interpolator<1>* interp_end; Interpolator<1>* interp_end;
bool abortscanline; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
@ -1091,18 +1212,35 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
int edge; int edge;
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr); xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0; if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
s32 xcov = 0; s32 xcov = 0;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(xend-x, timingcounter);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
else abortscanline = false;
// we cap it to 256 *after* counting the cycles, because yes, it tries to render oob pixels.
if (xend > 256)
{
r_edgelen += 256 - xend;
xend = 256;
}
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (l_edgecov & (1<<31)) if (l_edgecov & (1<<31))
{ {
xcov = (l_edgecov >> 12) & 0x3FF; xcov = (l_edgecov >> 12) & 0x3FF;
@ -1110,10 +1248,9 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
} }
if (!l_filledge) x = xlimit; if (!l_filledge) x = xlimit;
else else for (; x < xlimit; x++)
for (; x < xlimit; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows // check stencil buffer for shadows
@ -1201,15 +1338,13 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 2: polygon inside // part 2: polygon inside
edge = yedge; edge = yedge;
xlimit = xend-r_edgelen+1; xlimit = xend-r_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit); if (wireframe && !edge) x = std::max(x, xlimit);
else else for (; x < xlimit; x++)
for (; x < xlimit; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows // check stencil buffer for shadows
@ -1290,8 +1425,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 3: right edge // part 3: right edge
edge = yedge | 0x2; edge = yedge | 0x2;
xlimit = xend+1; xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (r_edgecov & (1<<31)) if (r_edgecov & (1<<31))
{ {
xcov = (r_edgecov >> 12) & 0x3FF; xcov = (r_edgecov >> 12) & 0x3FF;
@ -1301,7 +1435,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
if (r_filledge) if (r_filledge)
for (; x < xlimit; x++) for (; x < xlimit; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows // check stencil buffer for shadows
@ -1386,24 +1520,36 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
} }
} }
Step(rp);
rp->XL = rp->SlopeL.Step(); return abortscanline;
rp->XR = rp->SlopeR.Step();
} }
void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int firstpoly, int npolys, s32* timingcounter)
{ {
for (int i = 0; i < npolys; i++) bool abort = false;
for (; firstpoly < npolys; firstpoly++)
{ {
RendererPolygon* rp = &PolygonList[i]; RendererPolygon* rp = &PolygonList[firstpoly];
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) if (y == polygon->YBottom && y != polygon->YTop)
{ {
if (polygon->IsShadowMask) if (!abort) abort = !DoTimings(EmptyPolyScanline, timingcounter);
RenderShadowMaskScanline(gpu.GPU3D, rp, y); }
else if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{
if (!abort) abort = (!DoTimings(PerPolyScanline, timingcounter)
|| !CheckTimings(MinToStartPoly, timingcounter));
if (abort)
{
CheckSlope(rp, y);
Step(rp);
}
else if (polygon->IsShadowMask)
abort = RenderShadowMaskScanline(gpu.GPU3D, rp, y, timingcounter);
else else
RenderPolygonScanline(gpu, rp, y); abort = RenderPolygonScanline(gpu, rp, y, timingcounter);
} }
} }
} }
@ -1447,7 +1593,27 @@ u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const
return density; return density;
} }
void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) bool SoftRenderer::CheckEdgeMarkingPixel(u32 polyid, u32 z, u32 pixeladdr)
{
if ((polyid != AttrBuffer[pixeladdr] >> 24) && (z < DepthBuffer[pixeladdr])) return true;
else return false;
}
bool SoftRenderer::CheckEdgeMarkingClearPlane(const GPU3D& gpu3d, u32 polyid, u32 z)
{
// for some reason it never checks against the bitmap clear plane?
if (polyid != gpu3d.RenderClearAttr1>>24)
{
u32 clearz = ((gpu3d.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
if (z < clearz) return true;
else return false;
}
else return false;
}
template <bool push>
void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y, bool checkprev, bool checknext)
{ {
// to consider: // to consider:
// clearing all polygon fog flags if the master flag isn't set? // clearing all polygon fog flags if the master flag isn't set?
@ -1460,7 +1626,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
u32 attr = AttrBuffer[pixeladdr]; u32 attr = AttrBuffer[pixeladdr];
if (!(attr & 0xF)) continue; if (!(attr & 0xF)) continue;
@ -1468,11 +1634,45 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
u32 polyid = attr >> 24; // opaque polygon IDs are used for edgemarking u32 polyid = attr >> 24; // opaque polygon IDs are used for edgemarking
u32 z = DepthBuffer[pixeladdr]; u32 z = DepthBuffer[pixeladdr];
if (((polyid != (AttrBuffer[pixeladdr-1] >> 24)) && (z < DepthBuffer[pixeladdr-1])) || // check the pixel to the left
((polyid != (AttrBuffer[pixeladdr+1] >> 24)) && (z < DepthBuffer[pixeladdr+1])) || if (x == 0)
((polyid != (AttrBuffer[pixeladdr-ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr-ScanlineWidth])) ||
((polyid != (AttrBuffer[pixeladdr+ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr+ScanlineWidth])))
{ {
// edge marking bug emulation
if (checkprev ? CheckEdgeMarkingClearPlane(gpu3d, polyid, z) : // check against the clear plane
CheckEdgeMarkingPixel(polyid, z, pixeladdr-1 - ScanlineWidth)) // checks the right edge of the scanline 2 scanlines ago
goto pass;
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr-1)) goto pass; // normal check
// check the pixel to the right
if (x == 255)
{
// edge marking bug emulation
if (checknext ? CheckEdgeMarkingClearPlane(gpu3d, polyid, z) : // check against the clear plane
CheckEdgeMarkingPixel(polyid, z, pixeladdr+1 + ScanlineWidth)) // checks the left edge of the scanline 2 scanlines ahead
goto pass;
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr+1)) goto pass; // normal check
// check the pixel above
if (y == 0)
{
// edge marking bug emulation
if (CheckEdgeMarkingClearPlane(gpu3d, polyid, z)) goto pass; // check against the clear plane
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr-ScanlineWidth)) goto pass; // normal check
// check the pixel below
if (y == 191)
{
// edge marking bug emulation
if (CheckEdgeMarkingClearPlane(gpu3d, polyid, z)) goto pass; // check against the clear plane
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr+ScanlineWidth)) goto pass; // normal check
if (false)
{
pass:
u16 edgecolor = gpu3d.RenderEdgeTable[polyid >> 3]; u16 edgecolor = gpu3d.RenderEdgeTable[polyid >> 3];
u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++; u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++;
u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++; u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++;
@ -1508,7 +1708,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
u32 density, srccolor, srcR, srcG, srcB, srcA; u32 density, srccolor, srcR, srcG, srcB, srcA;
u32 attr = AttrBuffer[pixeladdr]; u32 attr = AttrBuffer[pixeladdr];
@ -1573,7 +1773,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x; u32 pixeladdr = (y*ScanlineWidth) + x;
u32 attr = AttrBuffer[pixeladdr]; u32 attr = AttrBuffer[pixeladdr];
if (!(attr & 0xF)) continue; if (!(attr & 0xF)) continue;
@ -1615,39 +1815,17 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
ColorBuffer[pixeladdr] = topR | (topG << 8) | (topB << 16) | (topA << 24); ColorBuffer[pixeladdr] = topR | (topG << 8) | (topB << 16) | (topA << 24);
} }
} }
if constexpr (push)
{
memcpy(&FinalBuffer[y*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], ScanlineWidth*4);
Platform::Semaphore_Post(Sema_ScanlineCount);
}
} }
void SoftRenderer::ClearBuffers(const GPU& gpu) void SoftRenderer::ClearBuffers(const GPU& gpu)
{ {
u32 clearz = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
u32 polyid = gpu.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID u32 polyid = gpu.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID
// fill screen borders for edge marking
for (int x = 0; x < ScanlineWidth; x++)
{
ColorBuffer[x] = 0;
DepthBuffer[x] = clearz;
AttrBuffer[x] = polyid;
}
for (int x = ScanlineWidth; x < ScanlineWidth*193; x+=ScanlineWidth)
{
ColorBuffer[x] = 0;
DepthBuffer[x] = clearz;
AttrBuffer[x] = polyid;
ColorBuffer[x+257] = 0;
DepthBuffer[x+257] = clearz;
AttrBuffer[x+257] = polyid;
}
for (int x = ScanlineWidth*193; x < ScanlineWidth*194; x++)
{
ColorBuffer[x] = 0;
DepthBuffer[x] = clearz;
AttrBuffer[x] = polyid;
}
// clear the screen // clear the screen
if (gpu.GPU3D.RenderDispCnt & (1<<14)) if (gpu.GPU3D.RenderDispCnt & (1<<14))
@ -1655,7 +1833,7 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
u8 xoff = (gpu.GPU3D.RenderClearAttr2 >> 16) & 0xFF; u8 xoff = (gpu.GPU3D.RenderClearAttr2 >> 16) & 0xFF;
u8 yoff = (gpu.GPU3D.RenderClearAttr2 >> 24) & 0xFF; u8 yoff = (gpu.GPU3D.RenderClearAttr2 >> 24) & 0xFF;
for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) for (int y = 0; y < 192; y++)
{ {
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
@ -1671,7 +1849,7 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF; u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF;
u32 pixeladdr = FirstPixelOffset + y + x; u32 pixeladdr = (y*ScanlineWidth) + x;
ColorBuffer[pixeladdr] = color; ColorBuffer[pixeladdr] = color;
DepthBuffer[pixeladdr] = z; DepthBuffer[pixeladdr] = z;
AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000); AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000);
@ -1684,6 +1862,8 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
} }
else else
{ {
u32 clearz = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
// TODO: confirm color conversion // TODO: confirm color conversion
u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++;
u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
@ -1693,11 +1873,11 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
polyid |= (gpu.GPU3D.RenderClearAttr1 & 0x8000); polyid |= (gpu.GPU3D.RenderClearAttr1 & 0x8000);
for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) for (int y = 0; y < 192; y++)
{ {
for (int x = 0; x < 256; x++) for (int x = 0; x < 256; x++)
{ {
u32 pixeladdr = FirstPixelOffset + y + x; u32 pixeladdr = (y*ScanlineWidth) + x;
ColorBuffer[pixeladdr] = color; ColorBuffer[pixeladdr] = color;
DepthBuffer[pixeladdr] = clearz; DepthBuffer[pixeladdr] = clearz;
AttrBuffer[pixeladdr] = polyid; AttrBuffer[pixeladdr] = polyid;
@ -1706,7 +1886,46 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
} }
} }
void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys) #define RDLINES_COUNT_INCREMENT\
/* feels wrong, needs improvement */\
while (RasterTiming >= RDDecrement[nextreadrd])\
{\
slwaitingrd--;\
nextreadrd++;\
/* update rdlines_count register */\
if (gpu.GPU3D.RDLinesTemp > slwaitingrd) gpu.GPU3D.RDLinesTemp = slwaitingrd;\
}
#define SCANLINE_BUFFER_SIM\
/* simulate the process of scanlines being read from the 48 scanline buffer */\
while (scanlineswaiting >= 47 || RasterTiming >= SLRead[nextread])\
{\
if (RasterTiming < SLRead[nextread])\
{\
timespent = SLRead[nextread] - RasterTiming;\
timespent += EMFixNum; /* fixes edge marking bug emulation. not sure why this is needed? */\
RasterTiming = SLRead[nextread];\
}\
scanlineswaiting--;\
nextread++;\
}
#define RENDER_SCANLINES(y)\
/* update sl timeout */\
ScanlineTimeout = SLRead[y-1] - (PreReadCutoff+FinalPassLen);\
\
FindFirstPolyDoTimings(j, y, &firstpolyeven, &firstpolyodd, &rastertimingeven, &rastertimingodd);\
RenderScanline(gpu, y, firstpolyeven, j, &rastertimingeven);\
RenderScanline(gpu, y+1, firstpolyodd, j, &rastertimingodd);\
\
prevtimespent = timespent;\
RasterTiming += timespent = std::max(std::initializer_list<s32> {rastertimingeven, rastertimingodd, FinalPassLen});\
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12);\
\
/* set the underflow flag if one of the scanlines came within 14 cycles of visible underflow */\
if ((ScanlineTimeout <= RasterTiming) && (gpu.GPU3D.UnderflowFlagVCount == (u16)-1)) gpu.GPU3D.UnderflowFlagVCount = y - (y&1 ? 0 : 1);
void SoftRenderer::RenderPolygonsTiming(GPU& gpu, Polygon** polygons, int npolys)
{ {
int j = 0; int j = 0;
for (int i = 0; i < npolys; i++) for (int i = 0; i < npolys; i++)
@ -1715,25 +1934,83 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
SetupPolygon(&PolygonList[j++], polygons[i]); SetupPolygon(&PolygonList[j++], polygons[i]);
} }
RenderScanline(gpu, 0, j); // reset scanline trackers
gpu.GPU3D.UnderflowFlagVCount = -1;
gpu.GPU3D.RDLinesTemp = 63;
RasterTiming = 0;
ScanlineTimeout = SLRead[2] - (PreReadCutoff+FinalPassLen+4); // TEMP: should be infinity, but i dont want it to break due to not being set up to handle this properly. //0x7FFFFFFF; // CHECKME: first scanline pair timeout.
s32 rastertimingeven, rastertimingodd; // always init to 0 at the start of a scanline render
s32 scanlineswaiting = 0, slwaitingrd = 0;
s32 nextread = 0, nextreadrd = 0;
u32 timespent, prevtimespent;
int firstpolyeven, firstpolyodd;
for (s32 y = 1; y < 192; y++) FindFirstPolyDoTimings(j, 0, &firstpolyeven, &firstpolyodd, &rastertimingeven, &rastertimingodd);
// scanlines are rendered in pairs of two
RenderScanline(gpu, 0, firstpolyeven, j, &rastertimingeven);
RenderScanline(gpu, 1, firstpolyodd, j, &rastertimingodd);
// it can't proceed to the next scanline unless all others steps are done (both scanlines in the pair, and final pass)
RasterTiming = timespent = std::max(std::initializer_list<s32> {rastertimingeven, rastertimingodd, FinalPassLen});
// 12 cycles at the end of a "timeout" are always used for w/e reason
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12); // should probably just be += 12 tbh but i'll leave it for now
// if first pair was not delayed past the first read, then later scanlines cannot either
// this allows us to implement a fast path
//if (SLRead[0] - timespent + ScanlinePushDelay >= 256)
{ {
RenderScanline(gpu, y, j); RENDER_SCANLINES(2)
ScanlineFinalPass(gpu.GPU3D, y-1);
if (threaded) scanlineswaiting++;
// Notify the main thread that we're done with a scanline. slwaitingrd++;
Platform::Semaphore_Post(Sema_ScanlineCount);
SCANLINE_BUFFER_SIM
RDLINES_COUNT_INCREMENT
// final pass pairs are the previous scanline pair offset -1 scanline, thus we start with only building one
ScanlineFinalPass<true>(gpu.GPU3D, 0, true, timespent >= EMGlitchThreshhold);
// main loop
for (int y = 4; y < 192; y+=2)
{
RENDER_SCANLINES(y)
scanlineswaiting += 2;
slwaitingrd += 2;
SCANLINE_BUFFER_SIM
RDLINES_COUNT_INCREMENT
ScanlineFinalPass<true>(gpu.GPU3D, y-3, prevtimespent >= EMGlitchThreshhold || y-3 == 1, timespent >= EMGlitchThreshhold);
ScanlineFinalPass<true>(gpu.GPU3D, y-2, prevtimespent >= EMGlitchThreshhold, timespent >= EMGlitchThreshhold);
}
scanlineswaiting += 2;
slwaitingrd += 2;
prevtimespent = timespent;
// emulate read timings one last time, since it shouldn't matter after this
// additionally dont bother tracking rdlines anymore since it shouldn't be able to decrement anymore (CHECKME)
SCANLINE_BUFFER_SIM
// finish the last 3 scanlines
ScanlineFinalPass<true>(gpu.GPU3D, 189, prevtimespent >= EMGlitchThreshhold, timespent >= EMGlitchThreshhold);
ScanlineFinalPass<true>(gpu.GPU3D, 190, prevtimespent >= EMGlitchThreshhold, true);
ScanlineFinalPass<true>(gpu.GPU3D, 191, timespent >= EMGlitchThreshhold, true);
} }
/*else
ScanlineFinalPass(gpu.GPU3D, 191); {
Coming soon^tm to a melonDS near you
if (threaded) }*/
// If this renderer is threaded, notify the main thread that we're done with the frame.
Platform::Semaphore_Post(Sema_ScanlineCount);
} }
#undef RENDER_SCANLINES
#undef SCANLINE_BUFFER_SIM
#undef RDLINES_COUNT_INCREMENT
void SoftRenderer::VCount144(GPU& gpu) void SoftRenderer::VCount144(GPU& gpu)
{ {
if (RenderThreadRunning.load(std::memory_order_relaxed) && !gpu.GPU3D.AbortFrame) if (RenderThreadRunning.load(std::memory_order_relaxed) && !gpu.GPU3D.AbortFrame)
@ -1757,8 +2034,14 @@ void SoftRenderer::RenderFrame(GPU& gpu)
} }
else if (!FrameIdentical) else if (!FrameIdentical)
{ {
//init internal buffer
ClearBuffers(gpu); ClearBuffers(gpu);
RenderPolygons(gpu, false, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
if (gpu.GPU3D.RenderingEnabled >= 3)
{
RenderPolygonsTiming(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}
else memcpy(FinalBuffer, ColorBuffer, sizeof(FinalBuffer));
} }
} }
@ -1789,8 +2072,18 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
} }
else else
{ {
//init internal buffer
ClearBuffers(gpu); ClearBuffers(gpu);
RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
if (gpu.GPU3D.RenderingEnabled >= 3)
{
RenderPolygonsTiming(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}
else
{
memcpy(FinalBuffer, ColorBuffer, sizeof(FinalBuffer));
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
} }
// Tell the main thread that we're done rendering // Tell the main thread that we're done rendering
@ -1800,19 +2093,23 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
RenderThreadRendering = false; RenderThreadRendering = false;
} }
} }
void SoftRenderer::ScanlineSync(int line)
u32* SoftRenderer::GetLine(int line)
{ {
// only used in accurate mode (timings must be emulated)
if (RenderThreadRunning.load(std::memory_order_relaxed)) if (RenderThreadRunning.load(std::memory_order_relaxed))
{ {
if (line < 192) if (line < 192)
// We need a scanline, so let's wait for the render thread to finish it. {
// (both threads process scanlines from top-to-bottom, // wait for two scanlines here, since scanlines render in pairs.
// so we don't need to wait for a specific row)
Platform::Semaphore_Wait(Sema_ScanlineCount); Platform::Semaphore_Wait(Sema_ScanlineCount);
Platform::Semaphore_Wait(Sema_ScanlineCount);
}
} }
}
return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset]; u32* SoftRenderer::GetLine(int line)
{
return &FinalBuffer[line*ScanlineWidth];
} }
} }

View File

@ -40,6 +40,7 @@ public:
void RenderFrame(GPU& gpu) override; void RenderFrame(GPU& gpu) override;
void RestartFrame(GPU& gpu) override; void RestartFrame(GPU& gpu) override;
u32* GetLine(int line) override; u32* GetLine(int line) override;
void ScanlineSync(int line) override;
void SetupRenderThread(GPU& gpu); void SetupRenderThread(GPU& gpu);
void EnableRenderThread(); void EnableRenderThread();
@ -445,36 +446,54 @@ private:
}; };
RendererPolygon PolygonList[2048]; RendererPolygon PolygonList[2048];
bool DoTimings(s32 cycles, s32* timingcounter);
bool CheckTimings(s32 cycles, s32* timingcounter);
u32 DoTimingsPixels(s32 pixels, s32* timingcounter);
void FindFirstPolyDoTimings(int npolys, s32 y, int* firstpolyeven, int* firstpolyodd, s32* timingcountereven, s32*timingcounterodd);
void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const;
u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const;
void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); void Step(RendererPolygon* rp);
void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); void CheckSlope(RendererPolygon* rp, s32 y);
void RenderScanline(const GPU& gpu, s32 y, int npolys); bool RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter);
bool RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter);
void RenderScanline(const GPU& gpu, s32 y, int firstpoly, int npolys, s32* timingcounter);
u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const;
void ScanlineFinalPass(const GPU3D& gpu3d, s32 y); bool CheckEdgeMarkingPixel(u32 polyid, u32 z, u32 pixeladdr);
bool CheckEdgeMarkingClearPlane(const GPU3D& gpu3d, u32 polyid, u32 z);
template <bool push> void ScanlineFinalPass(const GPU3D& gpu3d, s32 y, bool checkprev, bool checknext);
void ClearBuffers(const GPU& gpu); void ClearBuffers(const GPU& gpu);
void RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys); void RenderPolygonsFast(GPU& gpu, Polygon** polygons, int npolys);
void RenderPolygonsTiming(GPU& gpu, Polygon** polygons, int npolys);
void RenderThreadFunc(GPU& gpu); void RenderThreadFunc(GPU& gpu);
// counters for scanline rasterization timings
s32 ScanlineTimeout;
s32 RasterTiming;
// buffer dimensions are 258x194 to add a offscreen 1px border // buffer dimensions are 258x194 to add a offscreen 1px border
// which simplifies edge marking tests // which simplifies edge marking tests
// buffer is duplicated to keep track of the two topmost pixels // buffer is duplicated to keep track of the two topmost pixels
// TODO: check if the hardware can accidentally plot pixels // TODO: check if the hardware can accidentally plot pixels
// offscreen in that border // offscreen in that border
static constexpr int ScanlineWidth = 258; static constexpr int ScanlineWidth = 256;
static constexpr int NumScanlines = 194; static constexpr int NumScanlinesIntBuf = 192;
static constexpr int BufferSize = ScanlineWidth * NumScanlines; //static constexpr int NumScanlinesRD = 48;
static constexpr int FirstPixelOffset = ScanlineWidth + 1; static constexpr int NumScanlinesFinal = 192;
static constexpr int BufferSize = ScanlineWidth * NumScanlinesIntBuf;
//static constexpr int RDBufferSize = ScanlineWidth * NumScanlinesRD;
static constexpr int FinalBufferSize = ScanlineWidth * NumScanlinesFinal;
u32 ColorBuffer[BufferSize * 2]; u32 ColorBuffer[BufferSize * 2];
u32 DepthBuffer[BufferSize * 2]; u32 DepthBuffer[BufferSize * 2];
u32 AttrBuffer[BufferSize * 2]; u32 AttrBuffer[BufferSize * 2];
//u32 RDBuffer[RDBufferSize]; // is this buffer ever initialized by hw before writing to it? what is its initial value? can you transfer 3d framebuffer data between games?
u32 FinalBuffer[FinalBufferSize];
// attribute buffer: // attribute buffer:
// bit0-3: edge flags (left/right/top/bottom) // bit0-3: edge flags (left/right/top/bottom)