diff --git a/src/GPU3D.h b/src/GPU3D.h index 802e58b7..cc651cc6 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -352,6 +352,7 @@ public: virtual void SetupAccelFrame() {} virtual void PrepareCaptureFrame() {} + virtual void BindOutputTexture(int buffer) {} protected: Renderer3D(bool Accelerated); @@ -362,11 +363,4 @@ extern std::unique_ptr CurrentRenderer; } -#include "GPU3D_Soft.h" - -#ifdef OGLRENDERER_ENABLED -#include "GPU3D_OpenGL.h" -#include "GPU3D_Compute.h" -#endif - #endif diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 8d500a44..6424498b 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -24,11 +24,11 @@ #include "GPU3D_Compute_shaders.h" -namespace GPU3D +namespace melonDS { -ComputeRenderer::ComputeRenderer() - : Renderer3D(true), Texcache(TexcacheOpenGLLoader()) +ComputeRenderer::ComputeRenderer(GLCompositor&& compositor) + : Renderer3D(true), Texcache(TexcacheOpenGLLoader()), CurGLCompositor(std::move(compositor)) {} bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, const std::initializer_list& defines) @@ -64,7 +64,11 @@ void blah(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,con std::unique_ptr ComputeRenderer::New() { - std::unique_ptr result = std::unique_ptr(new ComputeRenderer()); + std::optional compositor = GLCompositor::New(); + if (!compositor) + return nullptr; + + std::unique_ptr result = std::unique_ptr(new ComputeRenderer(std::move(*compositor))); //glDebugMessageCallback(blah, NULL); //glEnable(GL_DEBUG_OUTPUT); @@ -174,26 +178,28 @@ void ComputeRenderer::DeleteShaders() glDeleteProgram(program); } -void ComputeRenderer::Reset() +void ComputeRenderer::Reset(GPU& gpu) { Texcache.Reset(); } -void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings) +void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates) { + CurGLCompositor.SetScaleFactor(scale); + if (ScaleFactor != -1) { DeleteShaders(); } - ScaleFactor = settings.GL_ScaleFactor; + ScaleFactor = scale; ScreenWidth = 256 * ScaleFactor; ScreenHeight = 192 * ScaleFactor; TilesPerLine = ScreenWidth/TileSize; TileLines = ScreenHeight/TileSize; - HiresCoordinates = settings.GL_HiresCoordinates; + HiresCoordinates = highResolutionCoordinates; MaxWorkTiles = TilesPerLine*TileLines*8; @@ -271,7 +277,7 @@ void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings) CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"}); } -void ComputeRenderer::VCount144() +void ComputeRenderer::VCount144(GPU& gpu) { } @@ -503,11 +509,9 @@ struct Variant => 20 Shader + 1x Shadow Mask */ -void ComputeRenderer::RenderFrame() +void ComputeRenderer::RenderFrame(GPU& gpu) { - //printf("render frame\n"); - - if (!Texcache.Update() && RenderFrameIdentical) + if (!Texcache.Update(gpu) && gpu.GPU3D.RenderFrameIdentical) { return; } @@ -530,11 +534,11 @@ void ComputeRenderer::RenderFrame() u32 numVariants = 0, prevVariant, prevTexLayer; Variant variants[MaxVariants]; - bool enableTextureMaps = RenderDispCnt & (1<<0); + bool enableTextureMaps = gpu.GPU3D.RenderDispCnt & (1<<0); - for (int i = 0; i < RenderNumPolygons; i++) + for (int i = 0; i < gpu.GPU3D.RenderNumPolygons; i++) { - Polygon* polygon = RenderPolygonRAM[i]; + Polygon* polygon = gpu.GPU3D.RenderPolygonRAM[i]; u32 nverts = polygon->NumVertices; u32 vtop = polygon->VTop, vbot = polygon->VBottom; @@ -550,7 +554,7 @@ void ComputeRenderer::RenderFrame() { // if the whole texture attribute matches // the texture layer will also match - Polygon* prevPolygon = RenderPolygonRAM[i - 1]; + Polygon* prevPolygon = gpu.GPU3D.RenderPolygonRAM[i - 1]; foundVariant = prevPolygon->TexParam == polygon->TexParam && prevPolygon->TexPalette == polygon->TexPalette && (prevPolygon->Attr & 0x30) == (polygon->Attr & 0x30) @@ -567,7 +571,7 @@ void ComputeRenderer::RenderFrame() // we always need to look up the texture to get the layer of the array texture if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7) { - Texcache.GetTexture(polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant); + Texcache.GetTexture(gpu, polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant); bool wrapS = (polygon->TexParam >> 16) & 1; bool wrapT = (polygon->TexParam >> 17) & 1; bool mirrorS = (polygon->TexParam >> 18) & 1; @@ -760,7 +764,7 @@ void ComputeRenderer::RenderFrame() glBufferSubData(GL_TEXTURE_BUFFER, 0, numSetupIndices*4*2, YSpanIndices.data()); glBindBuffer(GL_SHADER_STORAGE_BUFFER, RenderPolygonMemory); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, gpu.GPU3D.RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons); // we haven't accessed image data yet, so we don't need to invalidate anything } @@ -777,22 +781,22 @@ void ComputeRenderer::RenderFrame() glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, WorkDescMemory); MetaUniform meta; - meta.DispCnt = RenderDispCnt; - meta.NumPolygons = RenderNumPolygons; + meta.DispCnt = gpu.GPU3D.RenderDispCnt; + meta.NumPolygons = gpu.GPU3D.RenderNumPolygons; meta.NumVariants = numVariants; - meta.AlphaRef = RenderAlphaRef; + meta.AlphaRef = gpu.GPU3D.RenderAlphaRef; { - u32 r = (RenderClearAttr1 << 1) & 0x3E; if (r) r++; - u32 g = (RenderClearAttr1 >> 4) & 0x3E; if (g) g++; - u32 b = (RenderClearAttr1 >> 9) & 0x3E; if (b) b++; - u32 a = (RenderClearAttr1 >> 16) & 0x1F; + u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (gpu.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F; meta.ClearColor = r | (g << 8) | (b << 16) | (a << 24); - meta.ClearDepth = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - meta.ClearAttr = RenderClearAttr1 & 0x3F008000; + meta.ClearDepth = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + meta.ClearAttr = gpu.GPU3D.RenderClearAttr1 & 0x3F008000; } for (u32 i = 0; i < 32; i++) { - u32 color = RenderToonTable[i]; + u32 color = gpu.GPU3D.RenderToonTable[i]; u32 r = (color << 1) & 0x3E; u32 g = (color >> 4) & 0x3E; u32 b = (color >> 9) & 0x3E; @@ -804,11 +808,11 @@ void ComputeRenderer::RenderFrame() } for (u32 i = 0; i < 34; i++) { - meta.ToonTable[i*4+1] = RenderFogDensityTable[i]; + meta.ToonTable[i*4+1] = gpu.GPU3D.RenderFogDensityTable[i]; } for (u32 i = 0; i < 8; i++) { - u32 color = RenderEdgeTable[i]; + u32 color = gpu.GPU3D.RenderEdgeTable[i]; u32 r = (color << 1) & 0x3E; u32 g = (color >> 4) & 0x3E; u32 b = (color >> 9) & 0x3E; @@ -818,13 +822,13 @@ void ComputeRenderer::RenderFrame() meta.ToonTable[i*4+2] = r | (g << 8) | (b << 16); } - meta.FogOffset = RenderFogOffset; - meta.FogShift = RenderFogShift; + meta.FogOffset = gpu.GPU3D.RenderFogOffset; + meta.FogShift = gpu.GPU3D.RenderFogShift; { - u32 fogR = (RenderFogColor << 1) & 0x3E; if (fogR) fogR++; - u32 fogG = (RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; - u32 fogB = (RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; - u32 fogA = (RenderFogColor >> 16) & 0x1F; + u32 fogR = (gpu.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; + u32 fogG = (gpu.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; + u32 fogB = (gpu.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; + u32 fogA = (gpu.GPU3D.RenderFogColor >> 16) & 0x1F; meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24); } @@ -838,7 +842,7 @@ void ComputeRenderer::RenderFrame() bool wbuffer = false; if (numYSpans > 0) { - wbuffer = RenderPolygonRAM[0]->WBuffer; + wbuffer = gpu.GPU3D.RenderPolygonRAM[0]->WBuffer; glUseProgram(ShaderClearIndirectWorkCount); glDispatchCompute((numVariants+31)/32, 1, 1); @@ -851,7 +855,7 @@ void ComputeRenderer::RenderFrame() // bin polygons glUseProgram(ShaderBinCombined); - glDispatchCompute(((RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); + glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); // calculate list offsets @@ -872,7 +876,7 @@ void ComputeRenderer::RenderFrame() // rasterise { - bool highLightMode = RenderDispCnt & (1<<1); + bool highLightMode = gpu.GPU3D.RenderDispCnt & (1<<1); GLuint shadersNoTexture[] = { @@ -942,11 +946,11 @@ void ComputeRenderer::RenderFrame() glBindImageTexture(0, Framebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); glBindImageTexture(1, LowResFramebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); u32 finalPassShader = 0; - if (RenderDispCnt & (1<<4)) + if (gpu.GPU3D.RenderDispCnt & (1<<4)) finalPassShader |= 0x4; - if (RenderDispCnt & (1<<7)) + if (gpu.GPU3D.RenderDispCnt & (1<<7)) finalPassShader |= 0x2; - if (RenderDispCnt & (1<<5)) + if (gpu.GPU3D.RenderDispCnt & (1<<5)) finalPassShader |= 0x1; glUseProgram(ShaderFinalPass[finalPassShader]); @@ -1002,7 +1006,7 @@ void ComputeRenderer::RenderFrame() }*/ } -void ComputeRenderer::RestartFrame() +void ComputeRenderer::RestartFrame(GPU& gpu) { } @@ -1034,4 +1038,19 @@ void ComputeRenderer::PrepareCaptureFrame() glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, nullptr); } +void ComputeRenderer::BindOutputTexture(int buffer) +{ + CurGLCompositor.BindOutputTexture(buffer); +} + +void ComputeRenderer::Blit(const GPU &gpu) +{ + CurGLCompositor.RenderFrame(gpu, *this); +} + +void ComputeRenderer::Stop(const GPU &gpu) +{ + CurGLCompositor.Stop(gpu); +} + } \ No newline at end of file diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index b36bc18f..d37b6ef4 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -21,15 +21,18 @@ #include +#include "types.h" + #include "GPU3D.h" #include "OpenGLSupport.h" +#include "GPU_OpenGL.h" #include "GPU3D_TexcacheOpenGL.h" #include "NonStupidBitfield.h" -namespace GPU3D +namespace melonDS { class ComputeRenderer : public Renderer3D @@ -38,20 +41,25 @@ public: static std::unique_ptr New(); ~ComputeRenderer() override; - void Reset() override; + void Reset(GPU& gpu) override; - void SetRenderSettings(GPU::RenderSettings& settings) override; + void SetRenderSettings(int scale, bool highResolutionCoordinates); - void VCount144() override; + void VCount144(GPU& gpu) override; - void RenderFrame() override; - void RestartFrame() override; + void RenderFrame(GPU& gpu) override; + void RestartFrame(GPU& gpu) override; u32* GetLine(int line) override; void SetupAccelFrame() override; void PrepareCaptureFrame() override; + + void BindOutputTexture(int buffer) override; + + void Blit(const GPU& gpu) override; + void Stop(const GPU& gpu) override; private: - ComputeRenderer(); + ComputeRenderer(GLCompositor&& compositor); GLuint ShaderInterpXSpans[2]; GLuint ShaderBinCombined; @@ -213,6 +221,8 @@ private: int MaxWorkTiles; bool HiresCoordinates; + GLCompositor CurGLCompositor; + void DeleteShaders(); void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to); diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index 39858a4a..d365cf31 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -21,7 +21,7 @@ #include -namespace GPU3D +namespace melonDS { namespace ComputeRendererShaders diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 122b82ea..9088f078 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -1308,6 +1308,11 @@ void GLRenderer::Blit(const GPU& gpu) CurGLCompositor.RenderFrame(gpu, *this); } +void GLRenderer::BindOutputTexture(int buffer) +{ + CurGLCompositor.BindOutputTexture(buffer); +} + u32* GLRenderer::GetLine(int line) { int stride = 256; diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h index cc9956f7..dcab6e87 100644 --- a/src/GPU3D_OpenGL.h +++ b/src/GPU3D_OpenGL.h @@ -45,11 +45,10 @@ public: u32* GetLine(int line) override; void SetupAccelFrame() override; - void PrepareCaptureFrame() override override; + void PrepareCaptureFrame() override; void Blit(const GPU& gpu) override; - [[nodiscard]] const GLCompositor& GetCompositor() const noexcept { return CurGLCompositor; } - GLCompositor& GetCompositor() noexcept { return CurGLCompositor; } + void BindOutputTexture(int buffer) override; static std::unique_ptr New() noexcept; private: diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 74027d5b..a8da14cd 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -95,8 +95,8 @@ void SoftRenderer::EnableRenderThread() } } -SoftRenderer::SoftRenderer(bool threaded) noexcept - : Renderer3D(false), Threaded(threaded) +SoftRenderer::SoftRenderer() noexcept + : Renderer3D(false) { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 9cfdf9ad..45b2c539 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -29,7 +29,7 @@ namespace melonDS class SoftRenderer : public Renderer3D { public: - SoftRenderer(bool threaded = false) noexcept; + SoftRenderer() noexcept; ~SoftRenderer() override; void Reset(GPU& gpu) override; @@ -504,7 +504,7 @@ private: // threading - bool Threaded; + bool Threaded = false; Platform::Thread* RenderThread; std::atomic_bool RenderThreadRunning; std::atomic_bool RenderThreadRendering; diff --git a/src/GPU3D_Texcache.cpp b/src/GPU3D_Texcache.cpp index 1d409c23..196009e6 100644 --- a/src/GPU3D_Texcache.cpp +++ b/src/GPU3D_Texcache.cpp @@ -1,6 +1,6 @@ #include "GPU3D_Texcache.h" -namespace GPU3D +namespace melonDS { inline u16 ColorAvg(u16 color0, u16 color1) diff --git a/src/GPU3D_Texcache.h b/src/GPU3D_Texcache.h index 73d70cba..214c6254 100644 --- a/src/GPU3D_Texcache.h +++ b/src/GPU3D_Texcache.h @@ -6,11 +6,12 @@ #include #include +#include #define XXH_STATIC_LINKING_ONLY #include "xxhash/xxhash.h" -namespace GPU3D +namespace melonDS { inline u32 TextureWidth(u32 texparam) @@ -47,13 +48,13 @@ public: : TexLoader(texloader) // probably better if this would be a move constructor??? {} - bool Update() + bool Update(GPU& gpu) { - auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); - auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); + auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu); + auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu); - bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty); - bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty); + bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty); if (textureChanged || texPalChanged) { @@ -65,8 +66,8 @@ public: { for (u32 i = 0; i < 2; i++) { - u32 startBit = entry.TextureRAMStart[i] / GPU::VRAMDirtyGranularity; - u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + GPU::VRAMDirtyGranularity - 1) / GPU::VRAMDirtyGranularity) - startBit; + u32 startBit = entry.TextureRAMStart[i] / VRAMDirtyGranularity; + u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit; u32 startEntry = startBit >> 6; u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry; @@ -74,7 +75,7 @@ public: { if (GetRangedBitMask(j, startBit, bitsCount) & textureDirty.Data[j]) { - u64 newTexHash = XXH3_64bits(&GPU::VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]); + u64 newTexHash = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]); if (newTexHash != entry.TextureHash[i]) goto invalidate; @@ -85,8 +86,8 @@ public: if (texPalChanged && entry.TexPalSize > 0) { - u32 startBit = entry.TexPalStart / GPU::VRAMDirtyGranularity; - u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + GPU::VRAMDirtyGranularity - 1) / GPU::VRAMDirtyGranularity) - startBit; + u32 startBit = entry.TexPalStart / VRAMDirtyGranularity; + u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit; u32 startEntry = startBit >> 6; u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry; @@ -94,7 +95,7 @@ public: { if (GetRangedBitMask(j, startBit, bitsCount) & texPalDirty.Data[j]) { - u64 newPalHash = XXH3_64bits(&GPU::VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize); + u64 newPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize); if (newPalHash != entry.TexPalHash) goto invalidate; } @@ -117,7 +118,7 @@ public: return false; } - void GetTexture(u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper) + void GetTexture(GPU& gpu, u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper) { // remove sampling and texcoord gen params texParam &= ~0xC00F0000; @@ -162,17 +163,17 @@ public: { entry.TextureRAMSize[0] = width*height*2; - ConvertBitmapTexture(width, height, DecodingBuffer, &GPU::VRAMFlat_Texture[addr]); + ConvertBitmapTexture(width, height, DecodingBuffer, &gpu.VRAMFlat_Texture[addr]); } else if (fmt == 5) { - u8* texData = &GPU::VRAMFlat_Texture[addr]; + u8* texData = &gpu.VRAMFlat_Texture[addr]; u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1); if (addr >= 0x40000) slot1addr += 0x10000; - u8* texAuxData = &GPU::VRAMFlat_Texture[slot1addr]; + u8* texAuxData = &gpu.VRAMFlat_Texture[slot1addr]; - u16* palData = (u16*)(GPU::VRAMFlat_TexPal + palBase*16); + u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palBase*16); entry.TextureRAMSize[0] = width*height/16*4; entry.TextureRAMStart[1] = slot1addr; @@ -203,8 +204,8 @@ public: entry.TexPalStart = palAddr; entry.TexPalSize = numPalEntries*2; - u8* texData = &GPU::VRAMFlat_Texture[addr]; - u16* palData = (u16*)(GPU::VRAMFlat_TexPal + palAddr); + u8* texData = &gpu.VRAMFlat_Texture[addr]; + u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palAddr); //assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024); @@ -223,10 +224,10 @@ public: for (int i = 0; i < 2; i++) { if (entry.TextureRAMSize[i]) - entry.TextureHash[i] = XXH3_64bits(&GPU::VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]); + entry.TextureHash[i] = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]); } if (entry.TexPalSize) - entry.TexPalHash = XXH3_64bits(&GPU::VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize); + entry.TexPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize); auto& texArrays = TexArrays[widthLog2][heightLog2]; auto& freeTextures = FreeTextures[widthLog2][heightLog2]; @@ -234,7 +235,7 @@ public: if (freeTextures.size() == 0) { texArrays.resize(texArrays.size()+1); - GLuint& array = texArrays[texArrays.size()-1]; + TexHandleT& array = texArrays[texArrays.size()-1]; u32 layers = std::min((8*1024*1024) / (width*height*4), 64); diff --git a/src/GPU3D_TexcacheOpenGL.cpp b/src/GPU3D_TexcacheOpenGL.cpp index 04732371..95ca8cdc 100644 --- a/src/GPU3D_TexcacheOpenGL.cpp +++ b/src/GPU3D_TexcacheOpenGL.cpp @@ -1,6 +1,6 @@ #include "GPU3D_TexcacheOpenGL.h" -namespace GPU3D +namespace melonDS { GLuint TexcacheOpenGLLoader::GenerateTexture(u32 width, u32 height, u32 layers) diff --git a/src/GPU3D_TexcacheOpenGL.h b/src/GPU3D_TexcacheOpenGL.h index d61ae243..a8cfa576 100644 --- a/src/GPU3D_TexcacheOpenGL.h +++ b/src/GPU3D_TexcacheOpenGL.h @@ -4,7 +4,7 @@ #include "GPU3D_Texcache.h" #include "OpenGLSupport.h" -namespace GPU3D +namespace melonDS { template diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 6a6079ed..6084405b 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -169,7 +169,7 @@ GLCompositor& GLCompositor::operator=(GLCompositor&& other) noexcept CompVertices = other.CompVertices; // Clean up these resources before overwriting them - OpenGL::DeleteShaderProgram(CompShader.data()); + glDeleteProgram(CompShader); CompShader = other.CompShader; glDeleteBuffers(1, &CompVertexBufferID); @@ -239,7 +239,7 @@ void GLCompositor::Stop(const GPU& gpu) noexcept glBindFramebuffer(GL_FRAMEBUFFER, 0); } -void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept +void GLCompositor::RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept { int backbuf = gpu.FrontBuffer ^ 1; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); diff --git a/src/GPU_OpenGL.h b/src/GPU_OpenGL.h index d8472f3c..e9f4b173 100644 --- a/src/GPU_OpenGL.h +++ b/src/GPU_OpenGL.h @@ -28,6 +28,7 @@ namespace melonDS class GPU; struct RenderSettings; class GLRenderer; +class Renderer3D; class GLCompositor { public: @@ -42,7 +43,7 @@ public: [[nodiscard]] int GetScaleFactor() const noexcept { return Scale; } void Stop(const GPU& gpu) noexcept; - void RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept; + void RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept; void BindOutputTexture(int buf); private: GLCompositor(GLuint CompShader) noexcept; diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h index 67f999e1..a3cc4b2e 100644 --- a/src/NonStupidBitfield.h +++ b/src/NonStupidBitfield.h @@ -26,6 +26,9 @@ #include #include +namespace melonDS +{ + inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount) { u32 startEntry = startBit >> 6; @@ -55,8 +58,6 @@ inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount) // like std::bitset but less stupid and optimised for // our use case (keeping track of memory invalidations) -namespace melonDS -{ template struct NonStupidBitField { diff --git a/src/OpenGLSupport.cpp b/src/OpenGLSupport.cpp index d18fd730..a7d000ce 100644 --- a/src/OpenGLSupport.cpp +++ b/src/OpenGLSupport.cpp @@ -212,12 +212,7 @@ bool CompilerShader(GLuint& id, const std::string& source, const std::string& na Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", source.c_str()); delete[] log; - Platform::FileHandle* logf = Platform::OpenFile("shaderfail.log", Platform::FileMode::WriteText); - Platform::FileWrite(fs, len+1, 1, logf); - Platform::CloseFile(logf); - - glDeleteShader(ids[0]); - glDeleteShader(ids[1]); + glDeleteShader(id); return false; } diff --git a/src/frontend/qt_sdl/Config.cpp b/src/frontend/qt_sdl/Config.cpp index 0d54b7fb..d6d01825 100644 --- a/src/frontend/qt_sdl/Config.cpp +++ b/src/frontend/qt_sdl/Config.cpp @@ -248,7 +248,7 @@ ConfigEntry ConfigFile[] = {"ScreenVSync", 1, &ScreenVSync, false, false}, {"ScreenVSyncInterval", 0, &ScreenVSyncInterval, 1, false}, - {"3DRenderer", 0, &_3DRenderer, GPU::renderer3D_Software, false}, + {"3DRenderer", 0, &_3DRenderer, renderer3D_Software, false}, {"Threaded3D", 1, &Threaded3D, true, false}, {"GL_ScaleFactor", 0, &GL_ScaleFactor, 1, false}, diff --git a/src/frontend/qt_sdl/Config.h b/src/frontend/qt_sdl/Config.h index 1c30b627..38a1c34c 100644 --- a/src/frontend/qt_sdl/Config.h +++ b/src/frontend/qt_sdl/Config.h @@ -51,6 +51,16 @@ enum micInputType_MAX, }; +enum +{ + renderer3D_Software = 0, +#ifdef OGLRENDERER_ENABLED + renderer3D_OpenGL, + renderer3D_OpenGLCompute, +#endif + renderer3D_Max, +}; + namespace Config { diff --git a/src/frontend/qt_sdl/EmuThread.cpp b/src/frontend/qt_sdl/EmuThread.cpp index d16aead4..c62902b7 100644 --- a/src/frontend/qt_sdl/EmuThread.cpp +++ b/src/frontend/qt_sdl/EmuThread.cpp @@ -52,10 +52,12 @@ #include "DSi_I2C.h" #include "GPU3D_Soft.h" #include "GPU3D_OpenGL.h" +#include "GPU3D_Compute.h" #include "Savestate.h" #include "ROMManager.h" +#include "EmuThread.h" //#include "ArchiveUtil.h" //#include "CameraManager.h" @@ -94,9 +96,8 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent) } std::unique_ptr EmuThread::CreateConsole( - std::unique_ptr&& ndscart, - std::unique_ptr&& gbacart -) noexcept + std::unique_ptr &&ndscart, + std::unique_ptr &&gbacart) noexcept { auto arm7bios = ROMManager::LoadARM7BIOS(); if (!arm7bios) @@ -326,16 +327,7 @@ void EmuThread::run() videoRenderer = 0; } - if (videoRenderer == 0) - { // If we're using the software renderer... - NDS->GPU.SetRenderer3D(std::make_unique(Config::Threaded3D != 0)); - } - else - { - auto glrenderer = melonDS::GLRenderer::New(); - glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor); - NDS->GPU.SetRenderer3D(std::move(glrenderer)); - } + updateRenderer(); Input::Init(); @@ -451,20 +443,10 @@ void EmuThread::run() videoRenderer = 0; } - videoRenderer = screenGL ? Config::_3DRenderer : 0; + printf("miau\n"); + updateRenderer(); videoSettingsDirty = false; - - if (videoRenderer == 0) - { // If we're using the software renderer... - NDS->GPU.SetRenderer3D(std::make_unique(Config::Threaded3D != 0)); - } - else - { - auto glrenderer = melonDS::GLRenderer::New(); - glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor); - NDS->GPU.SetRenderer3D(std::move(glrenderer)); - } } // process input and hotkeys @@ -750,3 +732,39 @@ bool EmuThread::emuIsActive() { return (RunningSomething == 1); } + +void EmuThread::updateRenderer() +{ + if (videoRenderer != lastVideoRenderer) + { + printf("creating renderer %d\n", videoRenderer); + switch (videoRenderer) + { + case renderer3D_Software: + NDS->GPU.SetRenderer3D(std::make_unique()); + break; + case renderer3D_OpenGL: + NDS->GPU.SetRenderer3D(GLRenderer::New()); + break; + case renderer3D_OpenGLCompute: + NDS->GPU.SetRenderer3D(ComputeRenderer::New()); + break; + default: __builtin_unreachable(); + } + } + lastVideoRenderer = videoRenderer; + + switch (videoRenderer) + { + case renderer3D_Software: + static_cast(NDS->GPU.GetRenderer3D()).SetThreaded(Config::Threaded3D, NDS->GPU); + break; + case renderer3D_OpenGL: + static_cast(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor); + break; + case renderer3D_OpenGLCompute: + static_cast(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_ScaleFactor, Config::GL_HiresCoordinates); + break; + default: __builtin_unreachable(); + } +} diff --git a/src/frontend/qt_sdl/EmuThread.h b/src/frontend/qt_sdl/EmuThread.h index 4950ebbf..5246c1de 100644 --- a/src/frontend/qt_sdl/EmuThread.h +++ b/src/frontend/qt_sdl/EmuThread.h @@ -94,6 +94,8 @@ signals: void syncVolumeLevel(); private: + void updateRenderer(); + std::unique_ptr CreateConsole( std::unique_ptr&& ndscart, std::unique_ptr&& gbacart @@ -127,8 +129,7 @@ private: int autoScreenSizing; - int videoRenderer; - bool videoSettingsDirty; + int lastVideoRenderer = -1; }; #endif // EMUTHREAD_H diff --git a/src/frontend/qt_sdl/Screen.cpp b/src/frontend/qt_sdl/Screen.cpp index 73236504..9174d3dd 100644 --- a/src/frontend/qt_sdl/Screen.cpp +++ b/src/frontend/qt_sdl/Screen.cpp @@ -709,19 +709,17 @@ void ScreenPanelGL::initOpenGL() glContext->MakeCurrent(); - OpenGL::BuildShaderProgram(kScreenVS, kScreenFS, screenShaderProgram, "ScreenShader"); - GLuint pid = screenShaderProgram[2]; - glBindAttribLocation(pid, 0, "vPosition"); - glBindAttribLocation(pid, 1, "vTexcoord"); - glBindFragDataLocation(pid, 0, "oColor"); + OpenGL::CompileVertexFragmentProgram(screenShaderProgram, + kScreenVS, kScreenFS, + "ScreenShader", + {{"vPosition", 0}, {"vTexcoord", 1}}, + {{"oColor", 0}}); - OpenGL::LinkShaderProgram(screenShaderProgram); + glUseProgram(screenShaderProgram); + glUniform1i(glGetUniformLocation(screenShaderProgram, "ScreenTex"), 0); - glUseProgram(pid); - glUniform1i(glGetUniformLocation(pid, "ScreenTex"), 0); - - screenShaderScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize"); - screenShaderTransformULoc = glGetUniformLocation(pid, "uTransform"); + screenShaderScreenSizeULoc = glGetUniformLocation(screenShaderProgram, "uScreenSize"); + screenShaderTransformULoc = glGetUniformLocation(screenShaderProgram, "uTransform"); // to prevent bleeding between both parts of the screen // with bilinear filtering enabled @@ -769,21 +767,19 @@ void ScreenPanelGL::initOpenGL() memset(zeroData, 0, sizeof(zeroData)); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData); + OpenGL::CompileVertexFragmentProgram(osdShader, + kScreenVS_OSD, kScreenFS_OSD, + "OSDShader", + {{"vPosition", 0}}, + {{"oColor", 0}}); - OpenGL::BuildShaderProgram(kScreenVS_OSD, kScreenFS_OSD, osdShader, "OSDShader"); + glUseProgram(osdShader); + glUniform1i(glGetUniformLocation(osdShader, "OSDTex"), 0); - pid = osdShader[2]; - glBindAttribLocation(pid, 0, "vPosition"); - glBindFragDataLocation(pid, 0, "oColor"); - - OpenGL::LinkShaderProgram(osdShader); - glUseProgram(pid); - glUniform1i(glGetUniformLocation(pid, "OSDTex"), 0); - - osdScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize"); - osdPosULoc = glGetUniformLocation(pid, "uOSDPos"); - osdSizeULoc = glGetUniformLocation(pid, "uOSDSize"); - osdScaleFactorULoc = glGetUniformLocation(pid, "uScaleFactor"); + osdScreenSizeULoc = glGetUniformLocation(osdShader, "uScreenSize"); + osdPosULoc = glGetUniformLocation(osdShader, "uOSDPos"); + osdSizeULoc = glGetUniformLocation(osdShader, "uOSDSize"); + osdScaleFactorULoc = glGetUniformLocation(osdShader, "uScaleFactor"); const float osdvertices[6*2] = { @@ -818,8 +814,7 @@ void ScreenPanelGL::deinitOpenGL() glDeleteVertexArrays(1, &screenVertexArray); glDeleteBuffers(1, &screenVertexBuffer); - OpenGL::DeleteShaderProgram(screenShaderProgram); - + glDeleteProgram(screenShaderProgram); for (const auto& [key, tex] : osdTextures) { @@ -830,8 +825,7 @@ void ScreenPanelGL::deinitOpenGL() glDeleteVertexArrays(1, &osdVertexArray); glDeleteBuffers(1, &osdVertexBuffer); - OpenGL::DeleteShaderProgram(osdShader); - + glDeleteProgram(osdShader); glContext->DoneCurrent(); @@ -885,7 +879,7 @@ void ScreenPanelGL::drawScreenGL() glViewport(0, 0, w, h); - glUseProgram(screenShaderProgram[2]); + glUseProgram(screenShaderProgram); glUniform2f(screenShaderScreenSizeULoc, w / factor, h / factor); int frontbuf = emuThread->FrontBuffer; @@ -895,7 +889,7 @@ void ScreenPanelGL::drawScreenGL() if (emuThread->NDS->GPU.GetRenderer3D().Accelerated) { // hardware-accelerated render - static_cast(emuThread->NDS->GPU.GetRenderer3D()).GetCompositor().BindOutputTexture(frontbuf); + emuThread->NDS->GPU.GetRenderer3D().BindOutputTexture(frontbuf); } else #endif @@ -936,7 +930,7 @@ void ScreenPanelGL::drawScreenGL() u32 y = kOSDMargin; - glUseProgram(osdShader[2]); + glUseProgram(osdShader); glUniform2f(osdScreenSizeULoc, w, h); glUniform1f(osdScaleFactorULoc, factor); diff --git a/src/frontend/qt_sdl/Screen.h b/src/frontend/qt_sdl/Screen.h index c2f7fda1..4ef4feca 100644 --- a/src/frontend/qt_sdl/Screen.h +++ b/src/frontend/qt_sdl/Screen.h @@ -172,7 +172,7 @@ private: GLuint screenVertexBuffer, screenVertexArray; GLuint screenTexture; - GLuint screenShaderProgram[3]; + GLuint screenShaderProgram; GLuint screenShaderTransformULoc, screenShaderScreenSizeULoc; QMutex screenSettingsLock; @@ -181,7 +181,7 @@ private: int lastScreenWidth = -1, lastScreenHeight = -1; - GLuint osdShader[3]; + GLuint osdShader; GLint osdScreenSizeULoc, osdPosULoc, osdSizeULoc; GLfloat osdScaleFactorULoc; GLuint osdVertexArray; diff --git a/src/frontend/qt_sdl/VideoSettingsDialog.cpp b/src/frontend/qt_sdl/VideoSettingsDialog.cpp index 52076ae2..368c5e87 100644 --- a/src/frontend/qt_sdl/VideoSettingsDialog.cpp +++ b/src/frontend/qt_sdl/VideoSettingsDialog.cpp @@ -31,19 +31,19 @@ inline bool UsesGL() { - return (Config::ScreenUseGL != 0) || (Config::_3DRenderer != GPU::renderer3D_Software); + return (Config::ScreenUseGL != 0) || (Config::_3DRenderer != renderer3D_Software); } VideoSettingsDialog* VideoSettingsDialog::currentDlg = nullptr; void VideoSettingsDialog::setEnabled() { - bool softwareRenderer = Config::_3DRenderer == GPU::renderer3D_Software; + bool softwareRenderer = Config::_3DRenderer == renderer3D_Software; ui->cbGLDisplay->setEnabled(softwareRenderer); ui->cbSoftwareThreaded->setEnabled(softwareRenderer); ui->cbxGLResolution->setEnabled(!softwareRenderer); - ui->cbBetterPolygons->setEnabled(Config::_3DRenderer == GPU::renderer3D_OpenGL); - ui->cbxComputeHiResCoords->setEnabled(Config::_3DRenderer == GPU::renderer3D_OpenGLCompute); + ui->cbBetterPolygons->setEnabled(Config::_3DRenderer == renderer3D_OpenGL); + ui->cbxComputeHiResCoords->setEnabled(Config::_3DRenderer == renderer3D_OpenGLCompute); } VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::VideoSettingsDialog) @@ -61,9 +61,9 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui( oldHiresCoordinates = Config::GL_HiresCoordinates; grp3DRenderer = new QButtonGroup(this); - grp3DRenderer->addButton(ui->rb3DSoftware, GPU::renderer3D_Software); - grp3DRenderer->addButton(ui->rb3DOpenGL, GPU::renderer3D_OpenGL); - grp3DRenderer->addButton(ui->rb3DCompute, GPU::renderer3D_OpenGLCompute); + grp3DRenderer->addButton(ui->rb3DSoftware, renderer3D_Software); + grp3DRenderer->addButton(ui->rb3DOpenGL, renderer3D_OpenGL); + grp3DRenderer->addButton(ui->rb3DCompute, renderer3D_OpenGLCompute); #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0) connect(grp3DRenderer, SIGNAL(buttonClicked(int)), this, SLOT(onChange3DRenderer(int))); #else diff --git a/src/frontend/qt_sdl/Window.cpp b/src/frontend/qt_sdl/Window.cpp index a99546bd..536e0219 100644 --- a/src/frontend/qt_sdl/Window.cpp +++ b/src/frontend/qt_sdl/Window.cpp @@ -2048,6 +2048,7 @@ void MainWindow::onUpdateVideoSettings(bool glchange) connect(emuThread, SIGNAL(windowUpdate()), panel, SLOT(repaint())); } + printf("update video settings\n"); videoSettingsDirty = true; if (glchange) diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 01ba52c7..54ade119 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -175,10 +175,6 @@ bool camStarted[2]; //extern int AspectRatiosNum; - - - - static bool FileExtensionInList(const QString& filename, const QStringList& extensions, Qt::CaseSensitivity cs = Qt::CaseInsensitive) { return std::any_of(extensions.cbegin(), extensions.cend(), [&](const auto& ext) { @@ -339,10 +335,10 @@ int main(int argc, char** argv) if (!Config::Load()) QMessageBox::critical(NULL, "melonDS", "Unable to write to config.\nPlease check the write permissions of the folder you placed melonDS in."); -#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); } +#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); } SANITIZE(Config::ConsoleType, 0, 1); #ifdef OGLRENDERER_ENABLED - SANITIZE(Config::_3DRenderer, 0, 1); // 0 is the software renderer, 1 is the OpenGL renderer + SANITIZE(Config::_3DRenderer, 0, renderer3D_Max); #else SANITIZE(Config::_3DRenderer, 0, 0); #endif