actually make compute shader renderer work with newest changes

This commit is contained in:
RSDuck 2024-05-12 23:14:28 +02:00
parent 78828ecfca
commit cb83956914
25 changed files with 221 additions and 176 deletions

View File

@ -352,6 +352,7 @@ public:
virtual void SetupAccelFrame() {}
virtual void PrepareCaptureFrame() {}
virtual void BindOutputTexture(int buffer) {}
protected:
Renderer3D(bool Accelerated);
@ -362,11 +363,4 @@ extern std::unique_ptr<Renderer3D> CurrentRenderer;
}
#include "GPU3D_Soft.h"
#ifdef OGLRENDERER_ENABLED
#include "GPU3D_OpenGL.h"
#include "GPU3D_Compute.h"
#endif
#endif

View File

@ -24,11 +24,11 @@
#include "GPU3D_Compute_shaders.h"
namespace GPU3D
namespace melonDS
{
ComputeRenderer::ComputeRenderer()
: Renderer3D(true), Texcache(TexcacheOpenGLLoader())
ComputeRenderer::ComputeRenderer(GLCompositor&& compositor)
: Renderer3D(true), Texcache(TexcacheOpenGLLoader()), CurGLCompositor(std::move(compositor))
{}
bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, const std::initializer_list<const char*>& defines)
@ -64,7 +64,11 @@ void blah(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,con
std::unique_ptr<ComputeRenderer> ComputeRenderer::New()
{
std::unique_ptr<ComputeRenderer> result = std::unique_ptr<ComputeRenderer>(new ComputeRenderer());
std::optional<GLCompositor> compositor = GLCompositor::New();
if (!compositor)
return nullptr;
std::unique_ptr<ComputeRenderer> result = std::unique_ptr<ComputeRenderer>(new ComputeRenderer(std::move(*compositor)));
//glDebugMessageCallback(blah, NULL);
//glEnable(GL_DEBUG_OUTPUT);
@ -174,26 +178,28 @@ void ComputeRenderer::DeleteShaders()
glDeleteProgram(program);
}
void ComputeRenderer::Reset()
void ComputeRenderer::Reset(GPU& gpu)
{
Texcache.Reset();
}
void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings)
void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates)
{
CurGLCompositor.SetScaleFactor(scale);
if (ScaleFactor != -1)
{
DeleteShaders();
}
ScaleFactor = settings.GL_ScaleFactor;
ScaleFactor = scale;
ScreenWidth = 256 * ScaleFactor;
ScreenHeight = 192 * ScaleFactor;
TilesPerLine = ScreenWidth/TileSize;
TileLines = ScreenHeight/TileSize;
HiresCoordinates = settings.GL_HiresCoordinates;
HiresCoordinates = highResolutionCoordinates;
MaxWorkTiles = TilesPerLine*TileLines*8;
@ -271,7 +277,7 @@ void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings)
CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"});
}
void ComputeRenderer::VCount144()
void ComputeRenderer::VCount144(GPU& gpu)
{
}
@ -503,11 +509,9 @@ struct Variant
=> 20 Shader + 1x Shadow Mask
*/
void ComputeRenderer::RenderFrame()
void ComputeRenderer::RenderFrame(GPU& gpu)
{
//printf("render frame\n");
if (!Texcache.Update() && RenderFrameIdentical)
if (!Texcache.Update(gpu) && gpu.GPU3D.RenderFrameIdentical)
{
return;
}
@ -530,11 +534,11 @@ void ComputeRenderer::RenderFrame()
u32 numVariants = 0, prevVariant, prevTexLayer;
Variant variants[MaxVariants];
bool enableTextureMaps = RenderDispCnt & (1<<0);
bool enableTextureMaps = gpu.GPU3D.RenderDispCnt & (1<<0);
for (int i = 0; i < RenderNumPolygons; i++)
for (int i = 0; i < gpu.GPU3D.RenderNumPolygons; i++)
{
Polygon* polygon = RenderPolygonRAM[i];
Polygon* polygon = gpu.GPU3D.RenderPolygonRAM[i];
u32 nverts = polygon->NumVertices;
u32 vtop = polygon->VTop, vbot = polygon->VBottom;
@ -550,7 +554,7 @@ void ComputeRenderer::RenderFrame()
{
// if the whole texture attribute matches
// the texture layer will also match
Polygon* prevPolygon = RenderPolygonRAM[i - 1];
Polygon* prevPolygon = gpu.GPU3D.RenderPolygonRAM[i - 1];
foundVariant = prevPolygon->TexParam == polygon->TexParam
&& prevPolygon->TexPalette == polygon->TexPalette
&& (prevPolygon->Attr & 0x30) == (polygon->Attr & 0x30)
@ -567,7 +571,7 @@ void ComputeRenderer::RenderFrame()
// we always need to look up the texture to get the layer of the array texture
if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7)
{
Texcache.GetTexture(polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant);
Texcache.GetTexture(gpu, polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant);
bool wrapS = (polygon->TexParam >> 16) & 1;
bool wrapT = (polygon->TexParam >> 17) & 1;
bool mirrorS = (polygon->TexParam >> 18) & 1;
@ -760,7 +764,7 @@ void ComputeRenderer::RenderFrame()
glBufferSubData(GL_TEXTURE_BUFFER, 0, numSetupIndices*4*2, YSpanIndices.data());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, RenderPolygonMemory);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, gpu.GPU3D.RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons);
// we haven't accessed image data yet, so we don't need to invalidate anything
}
@ -777,22 +781,22 @@ void ComputeRenderer::RenderFrame()
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, WorkDescMemory);
MetaUniform meta;
meta.DispCnt = RenderDispCnt;
meta.NumPolygons = RenderNumPolygons;
meta.DispCnt = gpu.GPU3D.RenderDispCnt;
meta.NumPolygons = gpu.GPU3D.RenderNumPolygons;
meta.NumVariants = numVariants;
meta.AlphaRef = RenderAlphaRef;
meta.AlphaRef = gpu.GPU3D.RenderAlphaRef;
{
u32 r = (RenderClearAttr1 << 1) & 0x3E; if (r) r++;
u32 g = (RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
u32 b = (RenderClearAttr1 >> 9) & 0x3E; if (b) b++;
u32 a = (RenderClearAttr1 >> 16) & 0x1F;
u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++;
u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
u32 b = (gpu.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++;
u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F;
meta.ClearColor = r | (g << 8) | (b << 16) | (a << 24);
meta.ClearDepth = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
meta.ClearAttr = RenderClearAttr1 & 0x3F008000;
meta.ClearDepth = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
meta.ClearAttr = gpu.GPU3D.RenderClearAttr1 & 0x3F008000;
}
for (u32 i = 0; i < 32; i++)
{
u32 color = RenderToonTable[i];
u32 color = gpu.GPU3D.RenderToonTable[i];
u32 r = (color << 1) & 0x3E;
u32 g = (color >> 4) & 0x3E;
u32 b = (color >> 9) & 0x3E;
@ -804,11 +808,11 @@ void ComputeRenderer::RenderFrame()
}
for (u32 i = 0; i < 34; i++)
{
meta.ToonTable[i*4+1] = RenderFogDensityTable[i];
meta.ToonTable[i*4+1] = gpu.GPU3D.RenderFogDensityTable[i];
}
for (u32 i = 0; i < 8; i++)
{
u32 color = RenderEdgeTable[i];
u32 color = gpu.GPU3D.RenderEdgeTable[i];
u32 r = (color << 1) & 0x3E;
u32 g = (color >> 4) & 0x3E;
u32 b = (color >> 9) & 0x3E;
@ -818,13 +822,13 @@ void ComputeRenderer::RenderFrame()
meta.ToonTable[i*4+2] = r | (g << 8) | (b << 16);
}
meta.FogOffset = RenderFogOffset;
meta.FogShift = RenderFogShift;
meta.FogOffset = gpu.GPU3D.RenderFogOffset;
meta.FogShift = gpu.GPU3D.RenderFogShift;
{
u32 fogR = (RenderFogColor << 1) & 0x3E; if (fogR) fogR++;
u32 fogG = (RenderFogColor >> 4) & 0x3E; if (fogG) fogG++;
u32 fogB = (RenderFogColor >> 9) & 0x3E; if (fogB) fogB++;
u32 fogA = (RenderFogColor >> 16) & 0x1F;
u32 fogR = (gpu.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++;
u32 fogG = (gpu.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++;
u32 fogB = (gpu.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++;
u32 fogA = (gpu.GPU3D.RenderFogColor >> 16) & 0x1F;
meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24);
}
@ -838,7 +842,7 @@ void ComputeRenderer::RenderFrame()
bool wbuffer = false;
if (numYSpans > 0)
{
wbuffer = RenderPolygonRAM[0]->WBuffer;
wbuffer = gpu.GPU3D.RenderPolygonRAM[0]->WBuffer;
glUseProgram(ShaderClearIndirectWorkCount);
glDispatchCompute((numVariants+31)/32, 1, 1);
@ -851,7 +855,7 @@ void ComputeRenderer::RenderFrame()
// bin polygons
glUseProgram(ShaderBinCombined);
glDispatchCompute(((RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH);
glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
// calculate list offsets
@ -872,7 +876,7 @@ void ComputeRenderer::RenderFrame()
// rasterise
{
bool highLightMode = RenderDispCnt & (1<<1);
bool highLightMode = gpu.GPU3D.RenderDispCnt & (1<<1);
GLuint shadersNoTexture[] =
{
@ -942,11 +946,11 @@ void ComputeRenderer::RenderFrame()
glBindImageTexture(0, Framebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
glBindImageTexture(1, LowResFramebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
u32 finalPassShader = 0;
if (RenderDispCnt & (1<<4))
if (gpu.GPU3D.RenderDispCnt & (1<<4))
finalPassShader |= 0x4;
if (RenderDispCnt & (1<<7))
if (gpu.GPU3D.RenderDispCnt & (1<<7))
finalPassShader |= 0x2;
if (RenderDispCnt & (1<<5))
if (gpu.GPU3D.RenderDispCnt & (1<<5))
finalPassShader |= 0x1;
glUseProgram(ShaderFinalPass[finalPassShader]);
@ -1002,7 +1006,7 @@ void ComputeRenderer::RenderFrame()
}*/
}
void ComputeRenderer::RestartFrame()
void ComputeRenderer::RestartFrame(GPU& gpu)
{
}
@ -1034,4 +1038,19 @@ void ComputeRenderer::PrepareCaptureFrame()
glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, nullptr);
}
void ComputeRenderer::BindOutputTexture(int buffer)
{
CurGLCompositor.BindOutputTexture(buffer);
}
void ComputeRenderer::Blit(const GPU &gpu)
{
CurGLCompositor.RenderFrame(gpu, *this);
}
void ComputeRenderer::Stop(const GPU &gpu)
{
CurGLCompositor.Stop(gpu);
}
}

View File

@ -21,15 +21,18 @@
#include <memory>
#include "types.h"
#include "GPU3D.h"
#include "OpenGLSupport.h"
#include "GPU_OpenGL.h"
#include "GPU3D_TexcacheOpenGL.h"
#include "NonStupidBitfield.h"
namespace GPU3D
namespace melonDS
{
class ComputeRenderer : public Renderer3D
@ -38,20 +41,25 @@ public:
static std::unique_ptr<ComputeRenderer> New();
~ComputeRenderer() override;
void Reset() override;
void Reset(GPU& gpu) override;
void SetRenderSettings(GPU::RenderSettings& settings) override;
void SetRenderSettings(int scale, bool highResolutionCoordinates);
void VCount144() override;
void VCount144(GPU& gpu) override;
void RenderFrame() override;
void RestartFrame() override;
void RenderFrame(GPU& gpu) override;
void RestartFrame(GPU& gpu) override;
u32* GetLine(int line) override;
void SetupAccelFrame() override;
void PrepareCaptureFrame() override;
void BindOutputTexture(int buffer) override;
void Blit(const GPU& gpu) override;
void Stop(const GPU& gpu) override;
private:
ComputeRenderer();
ComputeRenderer(GLCompositor&& compositor);
GLuint ShaderInterpXSpans[2];
GLuint ShaderBinCombined;
@ -213,6 +221,8 @@ private:
int MaxWorkTiles;
bool HiresCoordinates;
GLCompositor CurGLCompositor;
void DeleteShaders();
void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to);

View File

@ -21,7 +21,7 @@
#include <string>
namespace GPU3D
namespace melonDS
{
namespace ComputeRendererShaders

View File

@ -1308,6 +1308,11 @@ void GLRenderer::Blit(const GPU& gpu)
CurGLCompositor.RenderFrame(gpu, *this);
}
void GLRenderer::BindOutputTexture(int buffer)
{
CurGLCompositor.BindOutputTexture(buffer);
}
u32* GLRenderer::GetLine(int line)
{
int stride = 256;

View File

@ -45,11 +45,10 @@ public:
u32* GetLine(int line) override;
void SetupAccelFrame() override;
void PrepareCaptureFrame() override override;
void PrepareCaptureFrame() override;
void Blit(const GPU& gpu) override;
[[nodiscard]] const GLCompositor& GetCompositor() const noexcept { return CurGLCompositor; }
GLCompositor& GetCompositor() noexcept { return CurGLCompositor; }
void BindOutputTexture(int buffer) override;
static std::unique_ptr<GLRenderer> New() noexcept;
private:

View File

@ -95,8 +95,8 @@ void SoftRenderer::EnableRenderThread()
}
}
SoftRenderer::SoftRenderer(bool threaded) noexcept
: Renderer3D(false), Threaded(threaded)
SoftRenderer::SoftRenderer() noexcept
: Renderer3D(false)
{
Sema_RenderStart = Platform::Semaphore_Create();
Sema_RenderDone = Platform::Semaphore_Create();

View File

@ -29,7 +29,7 @@ namespace melonDS
class SoftRenderer : public Renderer3D
{
public:
SoftRenderer(bool threaded = false) noexcept;
SoftRenderer() noexcept;
~SoftRenderer() override;
void Reset(GPU& gpu) override;
@ -504,7 +504,7 @@ private:
// threading
bool Threaded;
bool Threaded = false;
Platform::Thread* RenderThread;
std::atomic_bool RenderThreadRunning;
std::atomic_bool RenderThreadRendering;

View File

@ -1,6 +1,6 @@
#include "GPU3D_Texcache.h"
namespace GPU3D
namespace melonDS
{
inline u16 ColorAvg(u16 color0, u16 color1)

View File

@ -6,11 +6,12 @@
#include <assert.h>
#include <unordered_map>
#include <vector>
#define XXH_STATIC_LINKING_ONLY
#include "xxhash/xxhash.h"
namespace GPU3D
namespace melonDS
{
inline u32 TextureWidth(u32 texparam)
@ -47,13 +48,13 @@ public:
: TexLoader(texloader) // probably better if this would be a move constructor???
{}
bool Update()
bool Update(GPU& gpu)
{
auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu);
auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu);
bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty);
bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty);
if (textureChanged || texPalChanged)
{
@ -65,8 +66,8 @@ public:
{
for (u32 i = 0; i < 2; i++)
{
u32 startBit = entry.TextureRAMStart[i] / GPU::VRAMDirtyGranularity;
u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + GPU::VRAMDirtyGranularity - 1) / GPU::VRAMDirtyGranularity) - startBit;
u32 startBit = entry.TextureRAMStart[i] / VRAMDirtyGranularity;
u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
u32 startEntry = startBit >> 6;
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
@ -74,7 +75,7 @@ public:
{
if (GetRangedBitMask(j, startBit, bitsCount) & textureDirty.Data[j])
{
u64 newTexHash = XXH3_64bits(&GPU::VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
u64 newTexHash = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
if (newTexHash != entry.TextureHash[i])
goto invalidate;
@ -85,8 +86,8 @@ public:
if (texPalChanged && entry.TexPalSize > 0)
{
u32 startBit = entry.TexPalStart / GPU::VRAMDirtyGranularity;
u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + GPU::VRAMDirtyGranularity - 1) / GPU::VRAMDirtyGranularity) - startBit;
u32 startBit = entry.TexPalStart / VRAMDirtyGranularity;
u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
u32 startEntry = startBit >> 6;
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
@ -94,7 +95,7 @@ public:
{
if (GetRangedBitMask(j, startBit, bitsCount) & texPalDirty.Data[j])
{
u64 newPalHash = XXH3_64bits(&GPU::VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
u64 newPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
if (newPalHash != entry.TexPalHash)
goto invalidate;
}
@ -117,7 +118,7 @@ public:
return false;
}
void GetTexture(u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper)
void GetTexture(GPU& gpu, u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper)
{
// remove sampling and texcoord gen params
texParam &= ~0xC00F0000;
@ -162,17 +163,17 @@ public:
{
entry.TextureRAMSize[0] = width*height*2;
ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, &GPU::VRAMFlat_Texture[addr]);
ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, &gpu.VRAMFlat_Texture[addr]);
}
else if (fmt == 5)
{
u8* texData = &GPU::VRAMFlat_Texture[addr];
u8* texData = &gpu.VRAMFlat_Texture[addr];
u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
if (addr >= 0x40000)
slot1addr += 0x10000;
u8* texAuxData = &GPU::VRAMFlat_Texture[slot1addr];
u8* texAuxData = &gpu.VRAMFlat_Texture[slot1addr];
u16* palData = (u16*)(GPU::VRAMFlat_TexPal + palBase*16);
u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palBase*16);
entry.TextureRAMSize[0] = width*height/16*4;
entry.TextureRAMStart[1] = slot1addr;
@ -203,8 +204,8 @@ public:
entry.TexPalStart = palAddr;
entry.TexPalSize = numPalEntries*2;
u8* texData = &GPU::VRAMFlat_Texture[addr];
u16* palData = (u16*)(GPU::VRAMFlat_TexPal + palAddr);
u8* texData = &gpu.VRAMFlat_Texture[addr];
u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palAddr);
//assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024);
@ -223,10 +224,10 @@ public:
for (int i = 0; i < 2; i++)
{
if (entry.TextureRAMSize[i])
entry.TextureHash[i] = XXH3_64bits(&GPU::VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
entry.TextureHash[i] = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
}
if (entry.TexPalSize)
entry.TexPalHash = XXH3_64bits(&GPU::VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
entry.TexPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
auto& texArrays = TexArrays[widthLog2][heightLog2];
auto& freeTextures = FreeTextures[widthLog2][heightLog2];
@ -234,7 +235,7 @@ public:
if (freeTextures.size() == 0)
{
texArrays.resize(texArrays.size()+1);
GLuint& array = texArrays[texArrays.size()-1];
TexHandleT& array = texArrays[texArrays.size()-1];
u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64);

View File

@ -1,6 +1,6 @@
#include "GPU3D_TexcacheOpenGL.h"
namespace GPU3D
namespace melonDS
{
GLuint TexcacheOpenGLLoader::GenerateTexture(u32 width, u32 height, u32 layers)

View File

@ -4,7 +4,7 @@
#include "GPU3D_Texcache.h"
#include "OpenGLSupport.h"
namespace GPU3D
namespace melonDS
{
template <typename, typename>

View File

@ -169,7 +169,7 @@ GLCompositor& GLCompositor::operator=(GLCompositor&& other) noexcept
CompVertices = other.CompVertices;
// Clean up these resources before overwriting them
OpenGL::DeleteShaderProgram(CompShader.data());
glDeleteProgram(CompShader);
CompShader = other.CompShader;
glDeleteBuffers(1, &CompVertexBufferID);
@ -239,7 +239,7 @@ void GLCompositor::Stop(const GPU& gpu) noexcept
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
void GLCompositor::RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept
{
int backbuf = gpu.FrontBuffer ^ 1;
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);

View File

@ -28,6 +28,7 @@ namespace melonDS
class GPU;
struct RenderSettings;
class GLRenderer;
class Renderer3D;
class GLCompositor
{
public:
@ -42,7 +43,7 @@ public:
[[nodiscard]] int GetScaleFactor() const noexcept { return Scale; }
void Stop(const GPU& gpu) noexcept;
void RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept;
void RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept;
void BindOutputTexture(int buf);
private:
GLCompositor(GLuint CompShader) noexcept;

View File

@ -26,6 +26,9 @@
#include <initializer_list>
#include <algorithm>
namespace melonDS
{
inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount)
{
u32 startEntry = startBit >> 6;
@ -55,8 +58,6 @@ inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount)
// like std::bitset but less stupid and optimised for
// our use case (keeping track of memory invalidations)
namespace melonDS
{
template <u32 Size>
struct NonStupidBitField
{

View File

@ -212,12 +212,7 @@ bool CompilerShader(GLuint& id, const std::string& source, const std::string& na
Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", source.c_str());
delete[] log;
Platform::FileHandle* logf = Platform::OpenFile("shaderfail.log", Platform::FileMode::WriteText);
Platform::FileWrite(fs, len+1, 1, logf);
Platform::CloseFile(logf);
glDeleteShader(ids[0]);
glDeleteShader(ids[1]);
glDeleteShader(id);
return false;
}

View File

@ -248,7 +248,7 @@ ConfigEntry ConfigFile[] =
{"ScreenVSync", 1, &ScreenVSync, false, false},
{"ScreenVSyncInterval", 0, &ScreenVSyncInterval, 1, false},
{"3DRenderer", 0, &_3DRenderer, GPU::renderer3D_Software, false},
{"3DRenderer", 0, &_3DRenderer, renderer3D_Software, false},
{"Threaded3D", 1, &Threaded3D, true, false},
{"GL_ScaleFactor", 0, &GL_ScaleFactor, 1, false},

View File

@ -51,6 +51,16 @@ enum
micInputType_MAX,
};
enum
{
renderer3D_Software = 0,
#ifdef OGLRENDERER_ENABLED
renderer3D_OpenGL,
renderer3D_OpenGLCompute,
#endif
renderer3D_Max,
};
namespace Config
{

View File

@ -52,10 +52,12 @@
#include "DSi_I2C.h"
#include "GPU3D_Soft.h"
#include "GPU3D_OpenGL.h"
#include "GPU3D_Compute.h"
#include "Savestate.h"
#include "ROMManager.h"
#include "EmuThread.h"
//#include "ArchiveUtil.h"
//#include "CameraManager.h"
@ -95,8 +97,7 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent)
std::unique_ptr<NDS> EmuThread::CreateConsole(
std::unique_ptr<melonDS::NDSCart::CartCommon> &&ndscart,
std::unique_ptr<melonDS::GBACart::CartCommon>&& gbacart
) noexcept
std::unique_ptr<melonDS::GBACart::CartCommon> &&gbacart) noexcept
{
auto arm7bios = ROMManager::LoadARM7BIOS();
if (!arm7bios)
@ -326,16 +327,7 @@ void EmuThread::run()
videoRenderer = 0;
}
if (videoRenderer == 0)
{ // If we're using the software renderer...
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>(Config::Threaded3D != 0));
}
else
{
auto glrenderer = melonDS::GLRenderer::New();
glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
NDS->GPU.SetRenderer3D(std::move(glrenderer));
}
updateRenderer();
Input::Init();
@ -451,20 +443,10 @@ void EmuThread::run()
videoRenderer = 0;
}
videoRenderer = screenGL ? Config::_3DRenderer : 0;
printf("miau\n");
updateRenderer();
videoSettingsDirty = false;
if (videoRenderer == 0)
{ // If we're using the software renderer...
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>(Config::Threaded3D != 0));
}
else
{
auto glrenderer = melonDS::GLRenderer::New();
glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
NDS->GPU.SetRenderer3D(std::move(glrenderer));
}
}
// process input and hotkeys
@ -750,3 +732,39 @@ bool EmuThread::emuIsActive()
{
return (RunningSomething == 1);
}
void EmuThread::updateRenderer()
{
if (videoRenderer != lastVideoRenderer)
{
printf("creating renderer %d\n", videoRenderer);
switch (videoRenderer)
{
case renderer3D_Software:
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>());
break;
case renderer3D_OpenGL:
NDS->GPU.SetRenderer3D(GLRenderer::New());
break;
case renderer3D_OpenGLCompute:
NDS->GPU.SetRenderer3D(ComputeRenderer::New());
break;
default: __builtin_unreachable();
}
}
lastVideoRenderer = videoRenderer;
switch (videoRenderer)
{
case renderer3D_Software:
static_cast<SoftRenderer&>(NDS->GPU.GetRenderer3D()).SetThreaded(Config::Threaded3D, NDS->GPU);
break;
case renderer3D_OpenGL:
static_cast<GLRenderer&>(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
break;
case renderer3D_OpenGLCompute:
static_cast<ComputeRenderer&>(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_ScaleFactor, Config::GL_HiresCoordinates);
break;
default: __builtin_unreachable();
}
}

View File

@ -94,6 +94,8 @@ signals:
void syncVolumeLevel();
private:
void updateRenderer();
std::unique_ptr<melonDS::NDS> CreateConsole(
std::unique_ptr<melonDS::NDSCart::CartCommon>&& ndscart,
std::unique_ptr<melonDS::GBACart::CartCommon>&& gbacart
@ -127,8 +129,7 @@ private:
int autoScreenSizing;
int videoRenderer;
bool videoSettingsDirty;
int lastVideoRenderer = -1;
};
#endif // EMUTHREAD_H

View File

@ -709,19 +709,17 @@ void ScreenPanelGL::initOpenGL()
glContext->MakeCurrent();
OpenGL::BuildShaderProgram(kScreenVS, kScreenFS, screenShaderProgram, "ScreenShader");
GLuint pid = screenShaderProgram[2];
glBindAttribLocation(pid, 0, "vPosition");
glBindAttribLocation(pid, 1, "vTexcoord");
glBindFragDataLocation(pid, 0, "oColor");
OpenGL::CompileVertexFragmentProgram(screenShaderProgram,
kScreenVS, kScreenFS,
"ScreenShader",
{{"vPosition", 0}, {"vTexcoord", 1}},
{{"oColor", 0}});
OpenGL::LinkShaderProgram(screenShaderProgram);
glUseProgram(screenShaderProgram);
glUniform1i(glGetUniformLocation(screenShaderProgram, "ScreenTex"), 0);
glUseProgram(pid);
glUniform1i(glGetUniformLocation(pid, "ScreenTex"), 0);
screenShaderScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize");
screenShaderTransformULoc = glGetUniformLocation(pid, "uTransform");
screenShaderScreenSizeULoc = glGetUniformLocation(screenShaderProgram, "uScreenSize");
screenShaderTransformULoc = glGetUniformLocation(screenShaderProgram, "uTransform");
// to prevent bleeding between both parts of the screen
// with bilinear filtering enabled
@ -769,21 +767,19 @@ void ScreenPanelGL::initOpenGL()
memset(zeroData, 0, sizeof(zeroData));
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData);
OpenGL::CompileVertexFragmentProgram(osdShader,
kScreenVS_OSD, kScreenFS_OSD,
"OSDShader",
{{"vPosition", 0}},
{{"oColor", 0}});
OpenGL::BuildShaderProgram(kScreenVS_OSD, kScreenFS_OSD, osdShader, "OSDShader");
glUseProgram(osdShader);
glUniform1i(glGetUniformLocation(osdShader, "OSDTex"), 0);
pid = osdShader[2];
glBindAttribLocation(pid, 0, "vPosition");
glBindFragDataLocation(pid, 0, "oColor");
OpenGL::LinkShaderProgram(osdShader);
glUseProgram(pid);
glUniform1i(glGetUniformLocation(pid, "OSDTex"), 0);
osdScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize");
osdPosULoc = glGetUniformLocation(pid, "uOSDPos");
osdSizeULoc = glGetUniformLocation(pid, "uOSDSize");
osdScaleFactorULoc = glGetUniformLocation(pid, "uScaleFactor");
osdScreenSizeULoc = glGetUniformLocation(osdShader, "uScreenSize");
osdPosULoc = glGetUniformLocation(osdShader, "uOSDPos");
osdSizeULoc = glGetUniformLocation(osdShader, "uOSDSize");
osdScaleFactorULoc = glGetUniformLocation(osdShader, "uScaleFactor");
const float osdvertices[6*2] =
{
@ -818,8 +814,7 @@ void ScreenPanelGL::deinitOpenGL()
glDeleteVertexArrays(1, &screenVertexArray);
glDeleteBuffers(1, &screenVertexBuffer);
OpenGL::DeleteShaderProgram(screenShaderProgram);
glDeleteProgram(screenShaderProgram);
for (const auto& [key, tex] : osdTextures)
{
@ -830,8 +825,7 @@ void ScreenPanelGL::deinitOpenGL()
glDeleteVertexArrays(1, &osdVertexArray);
glDeleteBuffers(1, &osdVertexBuffer);
OpenGL::DeleteShaderProgram(osdShader);
glDeleteProgram(osdShader);
glContext->DoneCurrent();
@ -885,7 +879,7 @@ void ScreenPanelGL::drawScreenGL()
glViewport(0, 0, w, h);
glUseProgram(screenShaderProgram[2]);
glUseProgram(screenShaderProgram);
glUniform2f(screenShaderScreenSizeULoc, w / factor, h / factor);
int frontbuf = emuThread->FrontBuffer;
@ -895,7 +889,7 @@ void ScreenPanelGL::drawScreenGL()
if (emuThread->NDS->GPU.GetRenderer3D().Accelerated)
{
// hardware-accelerated render
static_cast<GLRenderer&>(emuThread->NDS->GPU.GetRenderer3D()).GetCompositor().BindOutputTexture(frontbuf);
emuThread->NDS->GPU.GetRenderer3D().BindOutputTexture(frontbuf);
}
else
#endif
@ -936,7 +930,7 @@ void ScreenPanelGL::drawScreenGL()
u32 y = kOSDMargin;
glUseProgram(osdShader[2]);
glUseProgram(osdShader);
glUniform2f(osdScreenSizeULoc, w, h);
glUniform1f(osdScaleFactorULoc, factor);

View File

@ -172,7 +172,7 @@ private:
GLuint screenVertexBuffer, screenVertexArray;
GLuint screenTexture;
GLuint screenShaderProgram[3];
GLuint screenShaderProgram;
GLuint screenShaderTransformULoc, screenShaderScreenSizeULoc;
QMutex screenSettingsLock;
@ -181,7 +181,7 @@ private:
int lastScreenWidth = -1, lastScreenHeight = -1;
GLuint osdShader[3];
GLuint osdShader;
GLint osdScreenSizeULoc, osdPosULoc, osdSizeULoc;
GLfloat osdScaleFactorULoc;
GLuint osdVertexArray;

View File

@ -31,19 +31,19 @@
inline bool UsesGL()
{
return (Config::ScreenUseGL != 0) || (Config::_3DRenderer != GPU::renderer3D_Software);
return (Config::ScreenUseGL != 0) || (Config::_3DRenderer != renderer3D_Software);
}
VideoSettingsDialog* VideoSettingsDialog::currentDlg = nullptr;
void VideoSettingsDialog::setEnabled()
{
bool softwareRenderer = Config::_3DRenderer == GPU::renderer3D_Software;
bool softwareRenderer = Config::_3DRenderer == renderer3D_Software;
ui->cbGLDisplay->setEnabled(softwareRenderer);
ui->cbSoftwareThreaded->setEnabled(softwareRenderer);
ui->cbxGLResolution->setEnabled(!softwareRenderer);
ui->cbBetterPolygons->setEnabled(Config::_3DRenderer == GPU::renderer3D_OpenGL);
ui->cbxComputeHiResCoords->setEnabled(Config::_3DRenderer == GPU::renderer3D_OpenGLCompute);
ui->cbBetterPolygons->setEnabled(Config::_3DRenderer == renderer3D_OpenGL);
ui->cbxComputeHiResCoords->setEnabled(Config::_3DRenderer == renderer3D_OpenGLCompute);
}
VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::VideoSettingsDialog)
@ -61,9 +61,9 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(
oldHiresCoordinates = Config::GL_HiresCoordinates;
grp3DRenderer = new QButtonGroup(this);
grp3DRenderer->addButton(ui->rb3DSoftware, GPU::renderer3D_Software);
grp3DRenderer->addButton(ui->rb3DOpenGL, GPU::renderer3D_OpenGL);
grp3DRenderer->addButton(ui->rb3DCompute, GPU::renderer3D_OpenGLCompute);
grp3DRenderer->addButton(ui->rb3DSoftware, renderer3D_Software);
grp3DRenderer->addButton(ui->rb3DOpenGL, renderer3D_OpenGL);
grp3DRenderer->addButton(ui->rb3DCompute, renderer3D_OpenGLCompute);
#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
connect(grp3DRenderer, SIGNAL(buttonClicked(int)), this, SLOT(onChange3DRenderer(int)));
#else

View File

@ -2048,6 +2048,7 @@ void MainWindow::onUpdateVideoSettings(bool glchange)
connect(emuThread, SIGNAL(windowUpdate()), panel, SLOT(repaint()));
}
printf("update video settings\n");
videoSettingsDirty = true;
if (glchange)

View File

@ -175,10 +175,6 @@ bool camStarted[2];
//extern int AspectRatiosNum;
static bool FileExtensionInList(const QString& filename, const QStringList& extensions, Qt::CaseSensitivity cs = Qt::CaseInsensitive)
{
return std::any_of(extensions.cbegin(), extensions.cend(), [&](const auto& ext) {
@ -339,10 +335,10 @@ int main(int argc, char** argv)
if (!Config::Load()) QMessageBox::critical(NULL, "melonDS", "Unable to write to config.\nPlease check the write permissions of the folder you placed melonDS in.");
#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); }
#define SANITIZE(var, min, max) { var = std::clamp<int>(var, min, max); }
SANITIZE(Config::ConsoleType, 0, 1);
#ifdef OGLRENDERER_ENABLED
SANITIZE(Config::_3DRenderer, 0, 1); // 0 is the software renderer, 1 is the OpenGL renderer
SANITIZE(Config::_3DRenderer, 0, renderer3D_Max);
#else
SANITIZE(Config::_3DRenderer, 0, 0);
#endif