Compute shader renderer (#2041)

* nothing works yet

* don't double buffer 3D framebuffers for the GL Renderer
looks like leftovers from when 3D+2D composition was done in the frontend

* oops

* it works!

* implement display capture for compute renderer
it's actually just all stolen from the regular OpenGL renderer

* fix bad indirect call

* handle cleanup properly

* add hires rendering to the compute shader renderer

* fix UB
also misc changes to use more unsigned multiplication
also fix framebuffer resize

* correct edge filling behaviour when AA is disabled

* fix full color textures

* fix edge marking (polygon id is 6-bit not 5)
also make the code a bit nicer

* take all edge cases into account for XMin/XMax calculation

* use hires coordinate again

* stop using fixed size buffers based on scale factor in shaders
this makes shader compile times tolerable on Wintel
- beginning of the shader cache
- increase size of tile idx in workdesc to 20 bits

* apparently & is not defined on bvec4
why does this even compile on Intel and Nvidia?

* put the texture cache into it's own file

* add compute shader renderer properly to the GUI
also add option to toggle using high resolution vertex coordinates

* unbind sampler object in compute shader renderer

* fix GetRangedBitMask for 64 bit aligned 64 bits
pretty embarassing

* convert NonStupidBitfield.h back to LF only new lines

* actually adapt to latest changes

* fix stupid merge

* actually make compute shader renderer work with newest changes

* show progress on shader compilation

* remove merge leftover
This commit is contained in:
RSDuck
2024-05-13 17:17:39 +02:00
committed by GitHub
parent c85a2103bb
commit 043244a56d
35 changed files with 4389 additions and 382 deletions

View File

@ -52,10 +52,12 @@
#include "DSi_I2C.h"
#include "GPU3D_Soft.h"
#include "GPU3D_OpenGL.h"
#include "GPU3D_Compute.h"
#include "Savestate.h"
#include "ROMManager.h"
#include "EmuThread.h"
//#include "ArchiveUtil.h"
//#include "CameraManager.h"
@ -94,9 +96,8 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent)
}
std::unique_ptr<NDS> EmuThread::CreateConsole(
std::unique_ptr<melonDS::NDSCart::CartCommon>&& ndscart,
std::unique_ptr<melonDS::GBACart::CartCommon>&& gbacart
) noexcept
std::unique_ptr<melonDS::NDSCart::CartCommon> &&ndscart,
std::unique_ptr<melonDS::GBACart::CartCommon> &&gbacart) noexcept
{
auto arm7bios = ROMManager::LoadARM7BIOS();
if (!arm7bios)
@ -326,21 +327,12 @@ void EmuThread::run()
videoRenderer = 0;
}
if (videoRenderer == 0)
{ // If we're using the software renderer...
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>(Config::Threaded3D != 0));
}
else
{
auto glrenderer = melonDS::GLRenderer::New();
glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
NDS->GPU.SetRenderer3D(std::move(glrenderer));
}
updateRenderer();
Input::Init();
u32 nframes = 0;
double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency();
perfCountsSec = 1.0 / SDL_GetPerformanceFrequency();
double lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
double frameLimitError = 0.0;
double lastMeasureTime = lastTime;
@ -451,20 +443,9 @@ void EmuThread::run()
videoRenderer = 0;
}
videoRenderer = screenGL ? Config::_3DRenderer : 0;
updateRenderer();
videoSettingsDirty = false;
if (videoRenderer == 0)
{ // If we're using the software renderer...
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>(Config::Threaded3D != 0));
}
else
{
auto glrenderer = melonDS::GLRenderer::New();
glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
NDS->GPU.SetRenderer3D(std::move(glrenderer));
}
}
// process input and hotkeys
@ -512,7 +493,16 @@ void EmuThread::run()
// emulate
u32 nlines = NDS->RunFrame();
u32 nlines;
if (NDS->GPU.GetRenderer3D().NeedsShaderCompile())
{
compileShaders();
nlines = 0;
}
else
{
nlines = NDS->RunFrame();
}
if (ROMManager::NDSSave)
ROMManager::NDSSave->CheckFlush();
@ -750,3 +740,53 @@ bool EmuThread::emuIsActive()
{
return (RunningSomething == 1);
}
void EmuThread::updateRenderer()
{
if (videoRenderer != lastVideoRenderer)
{
printf("creating renderer %d\n", videoRenderer);
switch (videoRenderer)
{
case renderer3D_Software:
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>());
break;
case renderer3D_OpenGL:
NDS->GPU.SetRenderer3D(GLRenderer::New());
break;
case renderer3D_OpenGLCompute:
NDS->GPU.SetRenderer3D(ComputeRenderer::New());
break;
default: __builtin_unreachable();
}
}
lastVideoRenderer = videoRenderer;
switch (videoRenderer)
{
case renderer3D_Software:
static_cast<SoftRenderer&>(NDS->GPU.GetRenderer3D()).SetThreaded(Config::Threaded3D, NDS->GPU);
break;
case renderer3D_OpenGL:
static_cast<GLRenderer&>(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
break;
case renderer3D_OpenGLCompute:
static_cast<ComputeRenderer&>(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_ScaleFactor, Config::GL_HiresCoordinates);
break;
default: __builtin_unreachable();
}
}
void EmuThread::compileShaders()
{
int currentShader, shadersCount;
u64 startTime = SDL_GetPerformanceCounter();
// kind of hacky to look at the wallclock, though it is easier than
// than disabling vsync
do
{
NDS->GPU.GetRenderer3D().ShaderCompileStep(currentShader, shadersCount);
} while (NDS->GPU.GetRenderer3D().NeedsShaderCompile() &&
(SDL_GetPerformanceCounter() - startTime) * perfCountsSec < 1.0 / 6.0);
mainWindow->osdAddMessage(0, "Compiling shader %d/%d", currentShader+1, shadersCount);
}