Compute shader renderer (#2041)

* nothing works yet

* don't double buffer 3D framebuffers for the GL Renderer
looks like leftovers from when 3D+2D composition was done in the frontend

* oops

* it works!

* implement display capture for compute renderer
it's actually just all stolen from the regular OpenGL renderer

* fix bad indirect call

* handle cleanup properly

* add hires rendering to the compute shader renderer

* fix UB
also misc changes to use more unsigned multiplication
also fix framebuffer resize

* correct edge filling behaviour when AA is disabled

* fix full color textures

* fix edge marking (polygon id is 6-bit not 5)
also make the code a bit nicer

* take all edge cases into account for XMin/XMax calculation

* use hires coordinate again

* stop using fixed size buffers based on scale factor in shaders
this makes shader compile times tolerable on Wintel
- beginning of the shader cache
- increase size of tile idx in workdesc to 20 bits

* apparently & is not defined on bvec4
why does this even compile on Intel and Nvidia?

* put the texture cache into it's own file

* add compute shader renderer properly to the GUI
also add option to toggle using high resolution vertex coordinates

* unbind sampler object in compute shader renderer

* fix GetRangedBitMask for 64 bit aligned 64 bits
pretty embarassing

* convert NonStupidBitfield.h back to LF only new lines

* actually adapt to latest changes

* fix stupid merge

* actually make compute shader renderer work with newest changes

* show progress on shader compilation

* remove merge leftover
This commit is contained in:
RSDuck
2024-05-13 17:17:39 +02:00
committed by GitHub
parent c85a2103bb
commit 043244a56d
35 changed files with 4389 additions and 382 deletions

View File

@ -709,19 +709,17 @@ void ScreenPanelGL::initOpenGL()
glContext->MakeCurrent();
OpenGL::BuildShaderProgram(kScreenVS, kScreenFS, screenShaderProgram, "ScreenShader");
GLuint pid = screenShaderProgram[2];
glBindAttribLocation(pid, 0, "vPosition");
glBindAttribLocation(pid, 1, "vTexcoord");
glBindFragDataLocation(pid, 0, "oColor");
OpenGL::CompileVertexFragmentProgram(screenShaderProgram,
kScreenVS, kScreenFS,
"ScreenShader",
{{"vPosition", 0}, {"vTexcoord", 1}},
{{"oColor", 0}});
OpenGL::LinkShaderProgram(screenShaderProgram);
glUseProgram(screenShaderProgram);
glUniform1i(glGetUniformLocation(screenShaderProgram, "ScreenTex"), 0);
glUseProgram(pid);
glUniform1i(glGetUniformLocation(pid, "ScreenTex"), 0);
screenShaderScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize");
screenShaderTransformULoc = glGetUniformLocation(pid, "uTransform");
screenShaderScreenSizeULoc = glGetUniformLocation(screenShaderProgram, "uScreenSize");
screenShaderTransformULoc = glGetUniformLocation(screenShaderProgram, "uTransform");
// to prevent bleeding between both parts of the screen
// with bilinear filtering enabled
@ -769,21 +767,19 @@ void ScreenPanelGL::initOpenGL()
memset(zeroData, 0, sizeof(zeroData));
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData);
OpenGL::CompileVertexFragmentProgram(osdShader,
kScreenVS_OSD, kScreenFS_OSD,
"OSDShader",
{{"vPosition", 0}},
{{"oColor", 0}});
OpenGL::BuildShaderProgram(kScreenVS_OSD, kScreenFS_OSD, osdShader, "OSDShader");
glUseProgram(osdShader);
glUniform1i(glGetUniformLocation(osdShader, "OSDTex"), 0);
pid = osdShader[2];
glBindAttribLocation(pid, 0, "vPosition");
glBindFragDataLocation(pid, 0, "oColor");
OpenGL::LinkShaderProgram(osdShader);
glUseProgram(pid);
glUniform1i(glGetUniformLocation(pid, "OSDTex"), 0);
osdScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize");
osdPosULoc = glGetUniformLocation(pid, "uOSDPos");
osdSizeULoc = glGetUniformLocation(pid, "uOSDSize");
osdScaleFactorULoc = glGetUniformLocation(pid, "uScaleFactor");
osdScreenSizeULoc = glGetUniformLocation(osdShader, "uScreenSize");
osdPosULoc = glGetUniformLocation(osdShader, "uOSDPos");
osdSizeULoc = glGetUniformLocation(osdShader, "uOSDSize");
osdScaleFactorULoc = glGetUniformLocation(osdShader, "uScaleFactor");
const float osdvertices[6*2] =
{
@ -818,8 +814,7 @@ void ScreenPanelGL::deinitOpenGL()
glDeleteVertexArrays(1, &screenVertexArray);
glDeleteBuffers(1, &screenVertexBuffer);
OpenGL::DeleteShaderProgram(screenShaderProgram);
glDeleteProgram(screenShaderProgram);
for (const auto& [key, tex] : osdTextures)
{
@ -830,8 +825,7 @@ void ScreenPanelGL::deinitOpenGL()
glDeleteVertexArrays(1, &osdVertexArray);
glDeleteBuffers(1, &osdVertexBuffer);
OpenGL::DeleteShaderProgram(osdShader);
glDeleteProgram(osdShader);
glContext->DoneCurrent();
@ -885,7 +879,7 @@ void ScreenPanelGL::drawScreenGL()
glViewport(0, 0, w, h);
glUseProgram(screenShaderProgram[2]);
glUseProgram(screenShaderProgram);
glUniform2f(screenShaderScreenSizeULoc, w / factor, h / factor);
int frontbuf = emuThread->FrontBuffer;
@ -895,7 +889,7 @@ void ScreenPanelGL::drawScreenGL()
if (emuThread->NDS->GPU.GetRenderer3D().Accelerated)
{
// hardware-accelerated render
static_cast<GLRenderer&>(emuThread->NDS->GPU.GetRenderer3D()).GetCompositor().BindOutputTexture(frontbuf);
emuThread->NDS->GPU.GetRenderer3D().BindOutputTexture(frontbuf);
}
else
#endif
@ -936,7 +930,7 @@ void ScreenPanelGL::drawScreenGL()
u32 y = kOSDMargin;
glUseProgram(osdShader[2]);
glUseProgram(osdShader);
glUniform2f(osdScreenSizeULoc, w, h);
glUniform1f(osdScaleFactorULoc, factor);