Compare commits

...

64 Commits

Author SHA1 Message Date
Jakly
27d4d516c7
Merge 0b85038586 into 7c1d2a64f4 2024-11-12 12:56:34 +01:00
Nadia Holmquist Pedersen
7c1d2a64f4 Set WIN32_LEAN_AND_MEAN, gets rid of the winsock2 warnings and probably
Some checks failed
macOS / ${{ matrix.arch }} (arm64) (push) Has been cancelled
macOS / ${{ matrix.arch }} (x86_64) (push) Has been cancelled
Ubuntu / x86_64 (push) Has been cancelled
Ubuntu / aarch64 (push) Has been cancelled
Windows / build (push) Has been cancelled
macOS / Universal binary (push) Has been cancelled
speeds up compilation a tiny bit

oh and NOMINMAX too for good measure while we're at it
2024-11-11 14:18:05 +01:00
Nadia Holmquist Pedersen
b2f6fab6f4 cmake: use interface include directories properly
and fix an indent I guess
2024-11-11 12:06:12 +01:00
Jaklyy
0b85038586 remove accuracy toggle
doesn't seem necessary
2024-05-09 09:45:36 -04:00
Jaklyy
72ffe6b297 fix bugs with negative viewports
it
it renders out of bounds pixels...........
2024-04-27 12:37:48 -04:00
Jaklyy
9b106d064d im dumb 2024-04-24 10:04:25 -04:00
Jaklyy
bb20a0b1d2 try to avoid some memcpys in fast mode 2024-04-24 08:04:41 -04:00
Jaklyy
635bfa0c29 even more cleanup! 2024-04-24 07:59:38 -04:00
Jaklyy
57e590269f cleanup more misc stuff 2024-04-24 00:27:50 -04:00
Jaklyy
896df08c5c clean up 2024-04-23 22:05:45 -04:00
Jaklyy
1aa86967b5 small fix 2024-04-21 14:49:27 -04:00
Jaklyy
424c5755ea nvm ill just shove it into hblank 2024-04-21 11:53:27 -04:00
Jaklyy
36f555db33 approximate time of dispcnt underflow bit update 2024-04-20 12:13:26 -04:00
Jaklyy
1c24fe03c2 improve when 3d dispcnt underflow flag updates 2024-04-19 13:03:04 -04:00
Jaklyy
520f7a0f3a small optimization 2024-04-19 10:41:01 -04:00
Jaklyy
39a569bd35 Revert "temp hack cuz lazyy"
This reverts commit 668c493bf4.
2024-04-19 08:00:29 -04:00
Jaklyy
663e72fe3a Merge remote-tracking branch 'upstream/master' into RDLines 2024-04-19 06:30:04 -04:00
Jaklyy
668c493bf4 temp hack cuz lazyy 2024-04-19 06:29:46 -04:00
Jaklyy
95faca402a accuracy toggle + some attempt at understanding slopes 2024-04-19 06:16:32 -04:00
Jaklyy
ae1890a8db Merge remote-tracking branch 'upstream/master' into RDLines 2024-04-17 13:04:08 -04:00
Jaklyy
cdc7b01701 fix up a few more things 2024-04-15 10:02:32 -04:00
Jaklyy
a51747b253 fix a bug i introduced, also fix one i didn't 2024-04-05 12:46:57 -04:00
Jaklyy
75956b43c4 better implement when the line count reg is/isnt updated 2024-04-04 19:36:27 -04:00
Jaklyy
e1cbadbe60 attempt at some cleanup 2024-04-03 15:18:25 -04:00
Jaklyy
b32f519c5a more cleanup + "fix" RDLines_Count
fix feels wrong, but i can't prove it either way yet.
2024-03-06 08:42:53 -05:00
Jaklyy
246fa18ab6 return false if underflowed
misc cleanup
2024-03-06 07:41:36 -05:00
Jaklyy
7f73dc35f9 minor cleanup 2024-03-05 21:22:51 -05:00
Jaklyy
52e097d97c Improve(?) edge marking check
fixes a bug
makes the code 200% uglier to look at though
2024-03-05 19:28:20 -05:00
Jaklyy
bbbd56877d minor tweaks to edge marking bug handling
for some reason it does not check against the depth bitmap when enabled?
2024-03-05 17:46:13 -05:00
Jaklyy
56e506ef9a misc cleanup 2024-02-26 12:25:49 -05:00
Jaklyy
9ffa04dfbc approximate rdlines_count; implement underflow flag 2024-02-25 22:41:33 -05:00
Jaklyy
9219a084c4 improve edge marking bug accuracy
also begin groundwork for rdlines_count register emulation
2024-02-25 16:45:22 -05:00
Jaklyy
f2cf447fc1 Merge branch 'rastertiming-mk4' into RDLines 2024-02-24 14:20:31 -05:00
Jaklyy
403674ebf4 Merge remote-tracking branch 'upstream/master' into RDLines 2024-02-24 14:20:25 -05:00
Jaklyy
249687a2ce rework 4: now with proper edge marking bug emulation! 2024-02-24 14:18:45 -05:00
Jaklyy
3256e054fa wip 2024-02-23 09:55:34 -05:00
Jaklyy
ae934021e5 improve scanline timeout slightly i hope
finally touch this again after 2 months
2024-02-13 21:38:53 -05:00
Jaklyy
ba4b4e2263 Merge remote-tracking branch 'upstream/master' into RDLines 2024-02-13 09:56:44 -05:00
Jaklyy
ab90b0aa83 Merge remote-tracking branch 'upstream/master' into RDLines 2023-12-26 02:19:12 -05:00
Jaklyy
4cb2c23ad6 fine linux 2023-12-25 19:39:54 -05:00
Jaklyy
bffc529c04 meh 2023-12-25 19:24:35 -05:00
Jaklyy
4f3b99f5c4 fix another crash + bug w/ scanline delay 2023-12-25 15:26:48 -05:00
Jaklyy
ee3e38aed3 fix bottom scanline bugging out
use a method of tracking progress through rendering that's less prone to me messing it up
2023-12-25 13:10:32 -05:00
Jaklyy
f239d0cf0d fix a crash and scanlines being incorrectly partially read 2023-12-24 20:27:24 -05:00
Jaklyy
fb5b2c299c new feature: crashes 2023-12-24 17:39:33 -05:00
Jaklyy
c05c79321a it works again! 2023-12-23 22:24:09 -05:00
Jaklyy
bf26b6817d partially rendering 2023-12-23 21:26:49 -05:00
Jaklyy
78da2846e6 wip - rewrite 3 - scheduler edition 2023-12-23 00:38:39 -05:00
Jaklyy
a338ef1c8a Merge remote-tracking branch 'upstream/master' into RDLines 2023-12-21 18:14:39 -05:00
Jaklyy
6cee0a7ad7 no idea how that one slipped in 2023-12-20 23:15:07 -05:00
Jaklyy
8cc42490de fix build but also sw renderer crashes now 2023-12-20 21:54:03 -05:00
Jaklyy
4c2e03af53 Merge branch 'master' of https://github.com/melonDS-emu/melonDS into RDLines 2023-12-20 20:55:35 -05:00
Jaklyy
1054011c90 wip 2023-12-19 22:52:54 -05:00
Jaklyy
2217a34d39 misc improvements 2023-12-14 23:00:12 -05:00
Jaklyy
a46316d71f improved timings for the first 50 scanlines 2023-12-14 15:18:39 -05:00
Jaklyy
24eecec50f implement first draft of improved timing structure 2023-12-12 00:01:26 -05:00
Jaklyy
0d6a8e0fb9 ok this one actually works 2023-12-10 19:22:30 -05:00
Jaklyy
2bf033e0bc optimize per pixel timing counting 2023-12-10 18:51:00 -05:00
Jaklyy
785fab024f dont use templates
bigger code <<< slower code
2023-12-10 13:03:54 -05:00
Jaklyy
63a39b130e refactor framebuffers to be more similar to hw
allows for emulation of niche scanline glitches
2023-12-10 12:18:46 -05:00
Jaklyy
c45d3320d0 tentative timings for "empty" polys scanlines, fix swapped polys breaking 2023-12-09 15:56:36 -05:00
Jaklyy
92ca04e479 i forgot i changed those-- oops
no wonder edge marking was broken
how did this even still work
2023-12-09 10:58:23 -05:00
Jaklyy
447cd50422 holds true when slopes are vertical and y > 50 2023-12-09 08:54:13 -05:00
Jaklyy
8e2c9cbff6 wip initial draft 2023-12-09 08:54:13 -05:00
8 changed files with 596 additions and 161 deletions

View File

@ -127,6 +127,8 @@ if (ENABLE_JIT)
endif()
endif()
target_include_directories(core INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
set(MELONDS_VERSION_SUFFIX "$ENV{MELONDS_VERSION_SUFFIX}" CACHE STRING "Suffix to add to displayed melonDS version")
option(MELONDS_EMBED_BUILD_INFO "Embed detailed build info into the binary" OFF)
set(MELONDS_GIT_BRANCH "$ENV{MELONDS_GIT_BRANCH}" CACHE STRING "The Git branch used for this build")
@ -178,6 +180,7 @@ endif()
if (WIN32)
target_link_libraries(core PRIVATE ole32 comctl32 wsock32 ws2_32)
target_compile_definitions(core PUBLIC WIN32_LEAN_AND_MEAN NOMINMAX)
elseif(NOT APPLE AND NOT HAIKU)
check_library_exists(rt shm_open "" NEED_LIBRT)
if (NEED_LIBRT)

View File

@ -879,6 +879,11 @@ void GPU::StartHBlank(u32 line) noexcept
DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1);
// TODO: not quite the correct update time, but... close enough i guess?
int scanline = (VCount == 262 ? 0 : (line+1));
if (!(scanline & 1)) GPU3D.ScanlineSync(scanline);
if (GPU3D.UnderflowFlagVCount == scanline) GPU3D.DispCnt |= (1<<12);
if (VCount < 192)
{
// draw
@ -1013,7 +1018,7 @@ void GPU::StartScanline(u32 line) noexcept
{
if (VCount == 192)
{
// in reality rendering already finishes at line 144
// in reality rendering already finishes at line 144 (can take up to ~191 depending on load)
// and games might already start to modify texture memory.
// That doesn't matter for us because we cache the entire
// texture memory anyway and only update it before the start

View File

@ -183,6 +183,8 @@ void GPU3D::ResetRenderingState() noexcept
RenderClearAttr1 = 0x3F000000;
RenderClearAttr2 = 0x00007FFF;
RenderFrameIdentical = false;
}
void GPU3D::Reset() noexcept
@ -236,7 +238,7 @@ void GPU3D::Reset() noexcept
TotalParams = 0;
GeometryEnabled = false;
RenderingEnabled = false;
RenderingEnabled = 0;
DispCnt = 0;
AlphaRefVal = 0;
@ -255,6 +257,9 @@ void GPU3D::Reset() noexcept
ResetRenderingState();
UnderflowFlagVCount = -1;
RDLines = 63;
AbortFrame = false;
Timestamp = 0;
@ -549,12 +554,16 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Bool32(&AbortFrame);
file->Bool32(&GeometryEnabled);
file->Bool32(&RenderingEnabled);
file->Var8(&RenderingEnabled);
file->Var32(&PolygonMode);
file->Var32(&PolygonAttr);
file->Var32(&CurPolygonAttr);
file->Var32(&TexParam);
file->Var32(&TexPalette);
file->Var8(&RDLines);
file->Var8(&RDLinesTemp);
RenderFrameIdentical = false;
if (softRenderer && softRenderer->IsThreaded())
{
@ -567,9 +576,19 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
void GPU3D::SetEnabled(bool geometry, bool rendering) noexcept
{
GeometryEnabled = geometry;
RenderingEnabled = rendering;
if (!rendering) ResetRenderingState();
if (rendering)
{
if (RenderingEnabled == 0)
{
RenderingEnabled = 1;
RDLinesTemp = 63; // CHECKME
}
}
else
{
ResetRenderingState();
RenderingEnabled = 0;
}
}
@ -2458,12 +2477,16 @@ bool YSort(Polygon* a, Polygon* b)
void GPU3D::VBlank() noexcept
{
if (RenderingEnabled)
RDLines = RDLinesTemp;
if (GeometryEnabled)
{
if (RenderingEnabled)
if (RenderingEnabled >= 3)
{
if (FlushRequest)
{
swap:
if (NumPolygons)
{
// separate translucent polygons from opaque ones
@ -2517,6 +2540,15 @@ void GPU3D::VBlank() noexcept
RenderClearAttr1 = ClearAttr1;
RenderClearAttr2 = ClearAttr2;
}
else if (RenderingEnabled != 0)
{
if (FlushRequest)
{
RenderingEnabled++;
if (RenderingEnabled >= 3)
goto swap;
}
}
if (FlushRequest)
{
@ -2545,6 +2577,10 @@ void GPU3D::SetRenderXPos(u16 xpos) noexcept
RenderXPos = xpos & 0x01FF;
}
void GPU3D::ScanlineSync(int line) noexcept
{
CurrentRenderer->ScanlineSync(line);
}
u32* GPU3D::GetLine(int line) noexcept
{
@ -2672,7 +2708,7 @@ u16 GPU3D::Read16(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return 46; // TODO, eventually
return RDLines;
case 0x04000600:
{
@ -2716,7 +2752,7 @@ u32 GPU3D::Read32(u32 addr) noexcept
return DispCnt;
case 0x04000320:
return 46; // TODO, eventually
return RDLines;
case 0x04000600:
{

View File

@ -25,10 +25,12 @@
#include "Savestate.h"
#include "FIFO.h"
namespace melonDS
{
class GPU;
struct Vertex
{
s32 Position[4];
@ -112,6 +114,7 @@ public:
void SetRenderXPos(u16 xpos) noexcept;
[[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; }
void ScanlineSync(int line) noexcept;
u32* GetLine(int line) noexcept;
void WriteToGXFIFO(u32 val) noexcept;
@ -241,9 +244,18 @@ public:
u32 TotalParams = 0;
bool GeometryEnabled = false;
bool RenderingEnabled = false;
// 0 = powered off
// 1 = powered on, inactive
// 2 = one swap buffers, inactive
// 3 = two swap buffers, active;
u8 RenderingEnabled = 0;
u32 DispCnt = 0;
u16 UnderflowFlagVCount = 0;
u8 RDLines = 0;
u8 RDLinesTemp = 0;
u8 AlphaRefVal = 0;
u8 AlphaRef = 0;
@ -329,6 +341,69 @@ public:
u32 ScrolledLine[256]; // not part of the hardware state, don't serialize
};
// Rasterization Timing Constants
static constexpr int TimingFrac = 1; // add a fractional component if pixels is not enough precision
// GPU 2D Read Timings: For Emulating Buffer Read/Write Race Conditions
static constexpr int DelayBetweenReads = 809 * TimingFrac;
static constexpr int ScanlineReadSpeed = 256 * TimingFrac;
static constexpr int ScanlineReadInc = DelayBetweenReads + ScanlineReadSpeed;
static constexpr int InitGPU2DTimeout = (51875+565) * TimingFrac; // 51618? 51874? 52128? | when it finishes reading the first scanline.
static constexpr int FrameLength = ScanlineReadInc * 263; // how long the entire frame is. TODO: Verify if we actually need this?
// compile-time list of scanline read times
// these *should* always occur at the same point in each frame, so it shouldn't matter if we make them fixed
static constexpr std::array<u32, 192> SLRead = []() constexpr {
std::array<u32, 192> readtime {};
for (int i = 0, time = InitGPU2DTimeout; i < 192; i++, time += ScanlineReadInc)
{
readtime[i] = time;
}
return readtime;
}();
static constexpr int PreReadCutoff = 565; // time before a read that a scanline is cutoff.
// the point at which rdlines decrements. not sure why it's different...?
static constexpr std::array<u32, 192> RDDecrement = []() constexpr {
std::array<u32, 192> dec {};
for (int i = 0; i < 192; i++)
{
dec[i] = SLRead[i] - 39 - (!(i % 2));
}
return dec;
}();
// GPU 3D Rasterization Timings: For Emulating Scanline Timeout
static constexpr int FinalPassLen = 500 * TimingFrac; // 496 (might technically be 500?) | the next scanline cannot begin while a scanline's final pass is in progress
// (can be interpreted as the minimum amount of cycles for the next scanline
// pair to start after the previous pair began) (related to final pass?)
static constexpr int ScanlinePushDelay = 242 * TimingFrac;
static constexpr int EMGlitchThreshhold = 502 * TimingFrac; // The threshold for the edge marking glitch behavior to change.
static constexpr int EMFixNum = 571 * TimingFrac; // Arbitrary value added to fix edge marking glitch, not sure why it's needed?
// GPU 3D Rasterization Timings II: For Tracking Timing Behaviors
//static constexpr int FirstPolyScanline = 0 * TimingFrac;
static constexpr int PerPolyScanline = 12 * TimingFrac; // 12 | The basic timing cost for polygons. Applies per polygon per scanline.
static constexpr int PerPixelTiming = 1 * TimingFrac; // 1 | 1 pixel = 1 pixel
static constexpr int NumFreePixels = 4; // 4 | First 4 pixels in a polygon scanline are free (for some reason)
static constexpr int MinToStartPoly = 2 * TimingFrac; // 1 | if there aren't 2 (why two?) cycles remaining after the polygon timing penalty,
// do not bother rendering the polygon (CHECKME: I dont think this should decrement timings by anything?)
static constexpr int EmptyPolyScanline = 4 * TimingFrac; // 4 | the ignored "empty" bottom-most scanline of a polygon
// which shouldn't be rendered for some reason has timing characteristics.
// GPU 3D Rasterization Timings III, For First Polygon "Pre-Calc" Timings
// should be added before other timings, as these are "async" pre-calcs of polygon attributes
static constexpr int FirstPolyDelay = 4 * TimingFrac; // 4 | Min amount of cycles to begin a scanline? (minimum time it takes to init the first polygon?)
// (Amount of time before the end of the cycle a scanline must abort?)
class Renderer3D
{
public:
@ -349,6 +424,7 @@ public:
virtual void RenderFrame(GPU& gpu) = 0;
virtual void RestartFrame(GPU& gpu) {};
virtual u32* GetLine(int line) = 0;
virtual void ScanlineSync(int line) {};
virtual void Blit(const GPU& gpu) {};
virtual void SetupAccelFrame() {}

View File

@ -19,6 +19,7 @@
#include "GPU3D_Soft.h"
#include <algorithm>
#include <initializer_list>
#include <stdio.h>
#include <string.h>
#include "NDS.h"
@ -138,6 +139,108 @@ void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
}
}
bool SoftRenderer::DoTimings(s32 cycles, s32* timingcounter)
{
// add timings to a counter and return false if underflowed.
*timingcounter += cycles;
if (RasterTiming + *timingcounter <= ScanlineTimeout) return true;
else return false;
}
bool SoftRenderer::CheckTimings(s32 cycles, s32* timingcounter)
{
// check if there are 'cycles' amount of cycles remaining.
if (RasterTiming + *timingcounter <= ScanlineTimeout - cycles) return true;
else return false;
}
u32 SoftRenderer::DoTimingsPixels(s32 pixels, s32* timingcounter)
{
// calculate and return the difference between the old span and the new span, while adding timings to the timings counter
// pixels dont count towards timings if they're the first 4 pixels in a polygon scanline (for some reason?)
if (pixels <= NumFreePixels) return 0;
pixels -= NumFreePixels;
*timingcounter += pixels;
pixels = -(ScanlineTimeout - (RasterTiming + *timingcounter));
if (pixels > 0)
{
*timingcounter -= pixels;
return pixels;
}
else return 0;
}
void SoftRenderer::FindFirstPolyDoTimings(int npolys, s32 y, int* firstpolyeven, int* firstpolyodd, s32* timingcountereven, s32*timingcounterodd)
{
// TODO: actually figure this out
// The First Polygon in each scanline pair has some additional timing penalties (presumably due to pipelining of the rasterizer)
bool fixeddelay = false;
bool perslope = false;
bool etc = false;
for (*firstpolyeven = 0; *firstpolyeven < npolys; (*firstpolyeven)++)
{
RendererPolygon* rp = &PolygonList[*firstpolyeven];
Polygon* polygon = rp->PolyData;
if (y >= polygon->YTop && y <= polygon->YBottom)
{
fixeddelay = true;
break;
/*if (y == polygon->YBottom) break;
if (y == polygon->YTop) {perslope = true; break;}
else if ((y == polygon->Vertices[rp->NextVL]->FinalPosition[1] || y == polygon->Vertices[rp->CurVL]->FinalPosition[1]) ||
(y == polygon->Vertices[rp->NextVR]->FinalPosition[1] || y == polygon->Vertices[rp->CurVR]->FinalPosition[1]))
{
perslope = true;
}
else etc = true;
break;*/
}
}
y++;
for (*firstpolyodd = 0; *firstpolyodd < npolys; (*firstpolyodd)++)
{
RendererPolygon* rp = &PolygonList[*firstpolyodd];
Polygon* polygon = rp->PolyData;
if (y >= polygon->YTop && y <= polygon->YBottom)
{
fixeddelay = true;
break;
/*if (y == polygon->YBottom) break;
if (y == polygon->YTop) {perslope = true; break;}
else if ((y == polygon->Vertices[rp->NextVL]->FinalPosition[1] || y == polygon->Vertices[rp->CurVL]->FinalPosition[1]) ||
(y == polygon->Vertices[rp->NextVR]->FinalPosition[1] || y == polygon->Vertices[rp->CurVR]->FinalPosition[1]))
{
perslope = true;
}
else etc = true;
break;*/
}
}
*timingcountereven = fixeddelay ? FirstPolyDelay : 0;// + perslope*FirstPerSlope + etc*2;
*timingcounterodd = fixeddelay ? FirstPolyDelay : 0;// + perslope*FirstPerSlope + etc*2;
/*if (!perslope)
{
*timingcountereven += etc*2;// + perslope*FirstPerSlope + etc*2;
*timingcounterodd += etc*2;// + perslope*FirstPerSlope + etc*2;
}
else
{
*timingcountereven += perslope*FirstPerSlope;// + perslope*FirstPerSlope + etc*2;
*timingcounterodd += perslope*FirstPerSlope;// + perslope*FirstPerSlope + etc*2;
}*/
}
void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const
{
u32 vramaddr = (texparam & 0xFFFF) << 3;
@ -705,7 +808,31 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
}
}
void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y)
void SoftRenderer::Step(RendererPolygon* rp)
{
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
}
void SoftRenderer::CheckSlope(RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
}
bool SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter)
{
Polygon* polygon = rp->PolyData;
@ -728,18 +855,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
PrevIsShadowMask = true;
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
CheckSlope(rp, y);
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend;
@ -748,6 +864,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
bool abortscanline; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL;
xend = rp->XR;
@ -831,7 +948,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// similarly, we can perform alpha test early (checkme)
if (wireframe) polyalpha = 31;
if (polyalpha <= gpu3d.RenderAlphaRef) return;
if (polyalpha <= gpu3d.RenderAlphaRef) return false; // TODO: check how this impacts timings?
// in wireframe mode, there are special rules for equal Z (TODO)
@ -841,25 +958,42 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
int edge;
s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0;
s32 xlimit;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(xend-x, timingcounter);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
else abortscanline = false;
// we cap it to 256 *after* counting the cycles, because yes, it tries to render oob pixels.
if (xend > 256)
{
r_edgelen += 256 - xend;
xend = 256;
}
// for shadow masks: set stencil bits where the depth test fails.
// draw nothing.
// part 1: left edge
edge = yedge | 0x1;
xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
if (xlimit > xend) xlimit = xend;
if (!l_filledge) x = xlimit;
else
for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x);
@ -879,13 +1013,12 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 2: polygon inside
edge = yedge;
xlimit = xend-r_edgelen+1;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend-r_edgelen;
if (xlimit > xend) xlimit = xend;
if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x);
@ -905,13 +1038,12 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 3: right edge
edge = yedge | 0x2;
xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend;
if (r_filledge)
for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
interpX.SetX(x);
@ -929,14 +1061,13 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
}
}
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
Step(rp);
return abortscanline;
}
void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y)
bool SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter)
{
Polygon* polygon = rp->PolyData;
u32 polyattr = (polygon->Attr & 0x3F008000);
if (!polygon->FacingView) polyattr |= (1<<4);
@ -953,18 +1084,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
PrevIsShadowMask = false;
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
}
}
CheckSlope(rp, y);
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend;
@ -973,6 +1093,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
bool abortscanline; // to abort the rest of the scanline after finishing this polygon
xstart = rp->XL;
xend = rp->XR;
@ -1091,18 +1212,35 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
int edge;
s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
xend += 1;
Interpolator<0> interpX(xstart, xend, wl, wr);
if (x < 0) x = 0;
s32 xlimit;
s32 xcov = 0;
// determine if the span can be rendered within the time allotted to the scanline
s32 diff = DoTimingsPixels(xend-x, timingcounter);
if (diff != 0)
{
xend -= diff;
r_edgelen -= diff;
abortscanline = true;
}
else abortscanline = false;
// we cap it to 256 *after* counting the cycles, because yes, it tries to render oob pixels.
if (xend > 256)
{
r_edgelen += 256 - xend;
xend = 256;
}
// part 1: left edge
edge = yedge | 0x1;
xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
if (xlimit > xend) xlimit = xend;
if (l_edgecov & (1<<31))
{
xcov = (l_edgecov >> 12) & 0x3FF;
@ -1110,10 +1248,9 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
}
if (!l_filledge) x = xlimit;
else
for (; x < xlimit; x++)
else for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows
@ -1201,15 +1338,13 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 2: polygon inside
edge = yedge;
xlimit = xend-r_edgelen+1;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend-r_edgelen;
if (xlimit > xend) xlimit = xend;
if (wireframe && !edge) x = std::max(x, xlimit);
else
for (; x < xlimit; x++)
else for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows
@ -1290,8 +1425,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 3: right edge
edge = yedge | 0x2;
xlimit = xend+1;
if (xlimit > 256) xlimit = 256;
xlimit = xend;
if (r_edgecov & (1<<31))
{
xcov = (r_edgecov >> 12) & 0x3FF;
@ -1301,7 +1435,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
if (r_filledge)
for (; x < xlimit; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
u32 dstattr = AttrBuffer[pixeladdr];
// check stencil buffer for shadows
@ -1386,24 +1520,36 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow);
}
}
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
Step(rp);
return abortscanline;
}
void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int firstpoly, int npolys, s32* timingcounter)
{
for (int i = 0; i < npolys; i++)
bool abort = false;
for (; firstpoly < npolys; firstpoly++)
{
RendererPolygon* rp = &PolygonList[i];
RendererPolygon* rp = &PolygonList[firstpoly];
Polygon* polygon = rp->PolyData;
if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
if (y == polygon->YBottom && y != polygon->YTop)
{
if (polygon->IsShadowMask)
RenderShadowMaskScanline(gpu.GPU3D, rp, y);
if (!abort) abort = !DoTimings(EmptyPolyScanline, timingcounter);
}
else if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{
if (!abort) abort = (!DoTimings(PerPolyScanline, timingcounter)
|| !CheckTimings(MinToStartPoly, timingcounter));
if (abort)
{
CheckSlope(rp, y);
Step(rp);
}
else if (polygon->IsShadowMask)
abort = RenderShadowMaskScanline(gpu.GPU3D, rp, y, timingcounter);
else
RenderPolygonScanline(gpu, rp, y);
abort = RenderPolygonScanline(gpu, rp, y, timingcounter);
}
}
}
@ -1447,7 +1593,27 @@ u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const
return density;
}
void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
bool SoftRenderer::CheckEdgeMarkingPixel(u32 polyid, u32 z, u32 pixeladdr)
{
if ((polyid != AttrBuffer[pixeladdr] >> 24) && (z < DepthBuffer[pixeladdr])) return true;
else return false;
}
bool SoftRenderer::CheckEdgeMarkingClearPlane(const GPU3D& gpu3d, u32 polyid, u32 z)
{
// for some reason it never checks against the bitmap clear plane?
if (polyid != gpu3d.RenderClearAttr1>>24)
{
u32 clearz = ((gpu3d.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
if (z < clearz) return true;
else return false;
}
else return false;
}
template <bool push>
void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y, bool checkprev, bool checknext)
{
// to consider:
// clearing all polygon fog flags if the master flag isn't set?
@ -1460,7 +1626,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
for (int x = 0; x < 256; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
u32 attr = AttrBuffer[pixeladdr];
if (!(attr & 0xF)) continue;
@ -1468,11 +1634,45 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
u32 polyid = attr >> 24; // opaque polygon IDs are used for edgemarking
u32 z = DepthBuffer[pixeladdr];
if (((polyid != (AttrBuffer[pixeladdr-1] >> 24)) && (z < DepthBuffer[pixeladdr-1])) ||
((polyid != (AttrBuffer[pixeladdr+1] >> 24)) && (z < DepthBuffer[pixeladdr+1])) ||
((polyid != (AttrBuffer[pixeladdr-ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr-ScanlineWidth])) ||
((polyid != (AttrBuffer[pixeladdr+ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr+ScanlineWidth])))
// check the pixel to the left
if (x == 0)
{
// edge marking bug emulation
if (checkprev ? CheckEdgeMarkingClearPlane(gpu3d, polyid, z) : // check against the clear plane
CheckEdgeMarkingPixel(polyid, z, pixeladdr-1 - ScanlineWidth)) // checks the right edge of the scanline 2 scanlines ago
goto pass;
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr-1)) goto pass; // normal check
// check the pixel to the right
if (x == 255)
{
// edge marking bug emulation
if (checknext ? CheckEdgeMarkingClearPlane(gpu3d, polyid, z) : // check against the clear plane
CheckEdgeMarkingPixel(polyid, z, pixeladdr+1 + ScanlineWidth)) // checks the left edge of the scanline 2 scanlines ahead
goto pass;
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr+1)) goto pass; // normal check
// check the pixel above
if (y == 0)
{
// edge marking bug emulation
if (CheckEdgeMarkingClearPlane(gpu3d, polyid, z)) goto pass; // check against the clear plane
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr-ScanlineWidth)) goto pass; // normal check
// check the pixel below
if (y == 191)
{
// edge marking bug emulation
if (CheckEdgeMarkingClearPlane(gpu3d, polyid, z)) goto pass; // check against the clear plane
}
else if (CheckEdgeMarkingPixel(polyid, z, pixeladdr+ScanlineWidth)) goto pass; // normal check
if (false)
{
pass:
u16 edgecolor = gpu3d.RenderEdgeTable[polyid >> 3];
u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++;
u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++;
@ -1508,7 +1708,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
for (int x = 0; x < 256; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
u32 density, srccolor, srcR, srcG, srcB, srcA;
u32 attr = AttrBuffer[pixeladdr];
@ -1573,7 +1773,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
for (int x = 0; x < 256; x++)
{
u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
u32 attr = AttrBuffer[pixeladdr];
if (!(attr & 0xF)) continue;
@ -1615,39 +1815,17 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y)
ColorBuffer[pixeladdr] = topR | (topG << 8) | (topB << 16) | (topA << 24);
}
}
if constexpr (push)
{
memcpy(&FinalBuffer[y*ScanlineWidth], &ColorBuffer[y*ScanlineWidth], ScanlineWidth*4);
Platform::Semaphore_Post(Sema_ScanlineCount);
}
}
void SoftRenderer::ClearBuffers(const GPU& gpu)
{
u32 clearz = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
u32 polyid = gpu.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID
// fill screen borders for edge marking
for (int x = 0; x < ScanlineWidth; x++)
{
ColorBuffer[x] = 0;
DepthBuffer[x] = clearz;
AttrBuffer[x] = polyid;
}
for (int x = ScanlineWidth; x < ScanlineWidth*193; x+=ScanlineWidth)
{
ColorBuffer[x] = 0;
DepthBuffer[x] = clearz;
AttrBuffer[x] = polyid;
ColorBuffer[x+257] = 0;
DepthBuffer[x+257] = clearz;
AttrBuffer[x+257] = polyid;
}
for (int x = ScanlineWidth*193; x < ScanlineWidth*194; x++)
{
ColorBuffer[x] = 0;
DepthBuffer[x] = clearz;
AttrBuffer[x] = polyid;
}
// clear the screen
if (gpu.GPU3D.RenderDispCnt & (1<<14))
@ -1655,7 +1833,7 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
u8 xoff = (gpu.GPU3D.RenderClearAttr2 >> 16) & 0xFF;
u8 yoff = (gpu.GPU3D.RenderClearAttr2 >> 24) & 0xFF;
for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth)
for (int y = 0; y < 192; y++)
{
for (int x = 0; x < 256; x++)
{
@ -1671,7 +1849,7 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF;
u32 pixeladdr = FirstPixelOffset + y + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
ColorBuffer[pixeladdr] = color;
DepthBuffer[pixeladdr] = z;
AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000);
@ -1684,6 +1862,8 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
}
else
{
u32 clearz = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
// TODO: confirm color conversion
u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++;
u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
@ -1693,11 +1873,11 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
polyid |= (gpu.GPU3D.RenderClearAttr1 & 0x8000);
for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth)
for (int y = 0; y < 192; y++)
{
for (int x = 0; x < 256; x++)
{
u32 pixeladdr = FirstPixelOffset + y + x;
u32 pixeladdr = (y*ScanlineWidth) + x;
ColorBuffer[pixeladdr] = color;
DepthBuffer[pixeladdr] = clearz;
AttrBuffer[pixeladdr] = polyid;
@ -1706,7 +1886,46 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
}
}
void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys)
#define RDLINES_COUNT_INCREMENT\
/* feels wrong, needs improvement */\
while (RasterTiming >= RDDecrement[nextreadrd])\
{\
slwaitingrd--;\
nextreadrd++;\
/* update rdlines_count register */\
if (gpu.GPU3D.RDLinesTemp > slwaitingrd) gpu.GPU3D.RDLinesTemp = slwaitingrd;\
}
#define SCANLINE_BUFFER_SIM\
/* simulate the process of scanlines being read from the 48 scanline buffer */\
while (scanlineswaiting >= 47 || RasterTiming >= SLRead[nextread])\
{\
if (RasterTiming < SLRead[nextread])\
{\
timespent = SLRead[nextread] - RasterTiming;\
timespent += EMFixNum; /* fixes edge marking bug emulation. not sure why this is needed? */\
RasterTiming = SLRead[nextread];\
}\
scanlineswaiting--;\
nextread++;\
}
#define RENDER_SCANLINES(y)\
/* update sl timeout */\
ScanlineTimeout = SLRead[y-1] - (PreReadCutoff+FinalPassLen);\
\
FindFirstPolyDoTimings(j, y, &firstpolyeven, &firstpolyodd, &rastertimingeven, &rastertimingodd);\
RenderScanline(gpu, y, firstpolyeven, j, &rastertimingeven);\
RenderScanline(gpu, y+1, firstpolyodd, j, &rastertimingodd);\
\
prevtimespent = timespent;\
RasterTiming += timespent = std::max(std::initializer_list<s32> {rastertimingeven, rastertimingodd, FinalPassLen});\
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12);\
\
/* set the underflow flag if one of the scanlines came within 14 cycles of visible underflow */\
if ((ScanlineTimeout <= RasterTiming) && (gpu.GPU3D.UnderflowFlagVCount == (u16)-1)) gpu.GPU3D.UnderflowFlagVCount = y - (y&1 ? 0 : 1);
void SoftRenderer::RenderPolygonsTiming(GPU& gpu, Polygon** polygons, int npolys)
{
int j = 0;
for (int i = 0; i < npolys; i++)
@ -1715,24 +1934,82 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
SetupPolygon(&PolygonList[j++], polygons[i]);
}
RenderScanline(gpu, 0, j);
// reset scanline trackers
gpu.GPU3D.UnderflowFlagVCount = -1;
gpu.GPU3D.RDLinesTemp = 63;
RasterTiming = 0;
ScanlineTimeout = SLRead[2] - (PreReadCutoff+FinalPassLen+4); // TEMP: should be infinity, but i dont want it to break due to not being set up to handle this properly. //0x7FFFFFFF; // CHECKME: first scanline pair timeout.
s32 rastertimingeven, rastertimingodd; // always init to 0 at the start of a scanline render
s32 scanlineswaiting = 0, slwaitingrd = 0;
s32 nextread = 0, nextreadrd = 0;
u32 timespent, prevtimespent;
int firstpolyeven, firstpolyodd;
for (s32 y = 1; y < 192; y++)
FindFirstPolyDoTimings(j, 0, &firstpolyeven, &firstpolyodd, &rastertimingeven, &rastertimingodd);
// scanlines are rendered in pairs of two
RenderScanline(gpu, 0, firstpolyeven, j, &rastertimingeven);
RenderScanline(gpu, 1, firstpolyodd, j, &rastertimingodd);
// it can't proceed to the next scanline unless all others steps are done (both scanlines in the pair, and final pass)
RasterTiming = timespent = std::max(std::initializer_list<s32> {rastertimingeven, rastertimingodd, FinalPassLen});
// 12 cycles at the end of a "timeout" are always used for w/e reason
RasterTiming += std::clamp(ScanlineTimeout - RasterTiming, 0, 12); // should probably just be += 12 tbh but i'll leave it for now
// if first pair was not delayed past the first read, then later scanlines cannot either
// this allows us to implement a fast path
//if (SLRead[0] - timespent + ScanlinePushDelay >= 256)
{
RenderScanline(gpu, y, j);
ScanlineFinalPass(gpu.GPU3D, y-1);
RENDER_SCANLINES(2)
if (threaded)
// Notify the main thread that we're done with a scanline.
Platform::Semaphore_Post(Sema_ScanlineCount);
scanlineswaiting++;
slwaitingrd++;
SCANLINE_BUFFER_SIM
RDLINES_COUNT_INCREMENT
// final pass pairs are the previous scanline pair offset -1 scanline, thus we start with only building one
ScanlineFinalPass<true>(gpu.GPU3D, 0, true, timespent >= EMGlitchThreshhold);
// main loop
for (int y = 4; y < 192; y+=2)
{
RENDER_SCANLINES(y)
scanlineswaiting += 2;
slwaitingrd += 2;
SCANLINE_BUFFER_SIM
RDLINES_COUNT_INCREMENT
ScanlineFinalPass<true>(gpu.GPU3D, y-3, prevtimespent >= EMGlitchThreshhold || y-3 == 1, timespent >= EMGlitchThreshhold);
ScanlineFinalPass<true>(gpu.GPU3D, y-2, prevtimespent >= EMGlitchThreshhold, timespent >= EMGlitchThreshhold);
}
ScanlineFinalPass(gpu.GPU3D, 191);
scanlineswaiting += 2;
slwaitingrd += 2;
prevtimespent = timespent;
if (threaded)
// If this renderer is threaded, notify the main thread that we're done with the frame.
Platform::Semaphore_Post(Sema_ScanlineCount);
// emulate read timings one last time, since it shouldn't matter after this
// additionally dont bother tracking rdlines anymore since it shouldn't be able to decrement anymore (CHECKME)
SCANLINE_BUFFER_SIM
// finish the last 3 scanlines
ScanlineFinalPass<true>(gpu.GPU3D, 189, prevtimespent >= EMGlitchThreshhold, timespent >= EMGlitchThreshhold);
ScanlineFinalPass<true>(gpu.GPU3D, 190, prevtimespent >= EMGlitchThreshhold, true);
ScanlineFinalPass<true>(gpu.GPU3D, 191, timespent >= EMGlitchThreshhold, true);
}
/*else
{
Coming soon^tm to a melonDS near you
}*/
}
#undef RENDER_SCANLINES
#undef SCANLINE_BUFFER_SIM
#undef RDLINES_COUNT_INCREMENT
void SoftRenderer::VCount144(GPU& gpu)
{
@ -1757,8 +2034,14 @@ void SoftRenderer::RenderFrame(GPU& gpu)
}
else if (!FrameIdentical)
{
//init internal buffer
ClearBuffers(gpu);
RenderPolygons(gpu, false, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
if (gpu.GPU3D.RenderingEnabled >= 3)
{
RenderPolygonsTiming(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}
else memcpy(FinalBuffer, ColorBuffer, sizeof(FinalBuffer));
}
}
@ -1789,8 +2072,18 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
}
else
{
//init internal buffer
ClearBuffers(gpu);
RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
if (gpu.GPU3D.RenderingEnabled >= 3)
{
RenderPolygonsTiming(gpu, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}
else
{
memcpy(FinalBuffer, ColorBuffer, sizeof(FinalBuffer));
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
}
// Tell the main thread that we're done rendering
@ -1800,19 +2093,23 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
RenderThreadRendering = false;
}
}
u32* SoftRenderer::GetLine(int line)
void SoftRenderer::ScanlineSync(int line)
{
// only used in accurate mode (timings must be emulated)
if (RenderThreadRunning.load(std::memory_order_relaxed))
{
if (line < 192)
// We need a scanline, so let's wait for the render thread to finish it.
// (both threads process scanlines from top-to-bottom,
// so we don't need to wait for a specific row)
{
// wait for two scanlines here, since scanlines render in pairs.
Platform::Semaphore_Wait(Sema_ScanlineCount);
Platform::Semaphore_Wait(Sema_ScanlineCount);
}
}
}
return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset];
u32* SoftRenderer::GetLine(int line)
{
return &FinalBuffer[line*ScanlineWidth];
}
}

View File

@ -40,6 +40,7 @@ public:
void RenderFrame(GPU& gpu) override;
void RestartFrame(GPU& gpu) override;
u32* GetLine(int line) override;
void ScanlineSync(int line) override;
void SetupRenderThread(GPU& gpu);
void EnableRenderThread();
@ -445,36 +446,54 @@ private:
};
RendererPolygon PolygonList[2048];
bool DoTimings(s32 cycles, s32* timingcounter);
bool CheckTimings(s32 cycles, s32* timingcounter);
u32 DoTimingsPixels(s32 pixels, s32* timingcounter);
void FindFirstPolyDoTimings(int npolys, s32 y, int* firstpolyeven, int* firstpolyodd, s32* timingcountereven, s32*timingcounterodd);
void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const;
u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const;
void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y);
void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y);
void RenderScanline(const GPU& gpu, s32 y, int npolys);
void Step(RendererPolygon* rp);
void CheckSlope(RendererPolygon* rp, s32 y);
bool RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y, s32* timingcounter);
bool RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y, s32* timingcounter);
void RenderScanline(const GPU& gpu, s32 y, int firstpoly, int npolys, s32* timingcounter);
u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const;
void ScanlineFinalPass(const GPU3D& gpu3d, s32 y);
bool CheckEdgeMarkingPixel(u32 polyid, u32 z, u32 pixeladdr);
bool CheckEdgeMarkingClearPlane(const GPU3D& gpu3d, u32 polyid, u32 z);
template <bool push> void ScanlineFinalPass(const GPU3D& gpu3d, s32 y, bool checkprev, bool checknext);
void ClearBuffers(const GPU& gpu);
void RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys);
void RenderPolygonsFast(GPU& gpu, Polygon** polygons, int npolys);
void RenderPolygonsTiming(GPU& gpu, Polygon** polygons, int npolys);
void RenderThreadFunc(GPU& gpu);
// counters for scanline rasterization timings
s32 ScanlineTimeout;
s32 RasterTiming;
// buffer dimensions are 258x194 to add a offscreen 1px border
// which simplifies edge marking tests
// buffer is duplicated to keep track of the two topmost pixels
// TODO: check if the hardware can accidentally plot pixels
// offscreen in that border
static constexpr int ScanlineWidth = 258;
static constexpr int NumScanlines = 194;
static constexpr int BufferSize = ScanlineWidth * NumScanlines;
static constexpr int FirstPixelOffset = ScanlineWidth + 1;
static constexpr int ScanlineWidth = 256;
static constexpr int NumScanlinesIntBuf = 192;
//static constexpr int NumScanlinesRD = 48;
static constexpr int NumScanlinesFinal = 192;
static constexpr int BufferSize = ScanlineWidth * NumScanlinesIntBuf;
//static constexpr int RDBufferSize = ScanlineWidth * NumScanlinesRD;
static constexpr int FinalBufferSize = ScanlineWidth * NumScanlinesFinal;
u32 ColorBuffer[BufferSize * 2];
u32 DepthBuffer[BufferSize * 2];
u32 AttrBuffer[BufferSize * 2];
//u32 RDBuffer[RDBufferSize]; // is this buffer ever initialized by hw before writing to it? what is its initial value? can you transfer 3d framebuffer data between games?
u32 FinalBuffer[FinalBufferSize];
// attribute buffer:
// bit0-3: edge flags (left/right/top/bottom)

View File

@ -91,8 +91,7 @@ add_compile_definitions(ARCHIVE_SUPPORT_ENABLED)
add_executable(melonDS ${SOURCES_QT_SDL})
add_subdirectory("../../net"
"${CMAKE_BINARY_DIR}/net"
)
${CMAKE_BINARY_DIR}/net)
target_link_libraries(melonDS PRIVATE net-utils)
@ -171,10 +170,10 @@ if (BUILD_STATIC)
endif()
endif()
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../..")
target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../net")
target_include_directories(melonDS PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}/..")
if (USE_QT6)
target_include_directories(melonDS PUBLIC ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
else()

View File

@ -11,9 +11,9 @@ add_library(net-utils STATIC
MPInterface.cpp
)
target_include_directories(net-utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(net-utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(net-utils PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}/..")
option(USE_SYSTEM_LIBSLIRP "Use system libslirp instead of the bundled version" OFF)
if (USE_SYSTEM_LIBSLIRP)