mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 13:27:41 -07:00
Compute shader renderer (#2041)
* nothing works yet * don't double buffer 3D framebuffers for the GL Renderer looks like leftovers from when 3D+2D composition was done in the frontend * oops * it works! * implement display capture for compute renderer it's actually just all stolen from the regular OpenGL renderer * fix bad indirect call * handle cleanup properly * add hires rendering to the compute shader renderer * fix UB also misc changes to use more unsigned multiplication also fix framebuffer resize * correct edge filling behaviour when AA is disabled * fix full color textures * fix edge marking (polygon id is 6-bit not 5) also make the code a bit nicer * take all edge cases into account for XMin/XMax calculation * use hires coordinate again * stop using fixed size buffers based on scale factor in shaders this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits * apparently & is not defined on bvec4 why does this even compile on Intel and Nvidia? * put the texture cache into it's own file * add compute shader renderer properly to the GUI also add option to toggle using high resolution vertex coordinates * unbind sampler object in compute shader renderer * fix GetRangedBitMask for 64 bit aligned 64 bits pretty embarassing * convert NonStupidBitfield.h back to LF only new lines * actually adapt to latest changes * fix stupid merge * actually make compute shader renderer work with newest changes * show progress on shader compilation * remove merge leftover
This commit is contained in:
parent
c85a2103bb
commit
043244a56d
@ -35,6 +35,8 @@ add_library(core STATIC
|
||||
GPU2D_Soft.cpp
|
||||
GPU3D.cpp
|
||||
GPU3D_Soft.cpp
|
||||
GPU3D_Texcache.cpp
|
||||
GPU3D_Texcache.h
|
||||
melonDLDI.h
|
||||
NDS.cpp
|
||||
NDSCart.cpp
|
||||
@ -78,6 +80,9 @@ if (ENABLE_OGLRENDERER)
|
||||
GPU_OpenGL.cpp
|
||||
GPU_OpenGL_shaders.h
|
||||
GPU3D_OpenGL.cpp
|
||||
GPU3D_Compute.cpp
|
||||
GPU3D_TexcacheOpenGL.cpp
|
||||
GPU3D_TexcacheOpenGL.h
|
||||
GPU3D_OpenGL_shaders.h
|
||||
OpenGLSupport.cpp)
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "DSi.h"
|
||||
#include "DMA.h"
|
||||
#include "GPU.h"
|
||||
#include "GPU3D.h"
|
||||
#include "DMA_Timings.h"
|
||||
#include "Platform.h"
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "DSi_NDMA.h"
|
||||
#include "GPU.h"
|
||||
#include "DSi_AES.h"
|
||||
#include "GPU3D.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "ARMJIT.h"
|
||||
|
||||
#include "GPU2D_Soft.h"
|
||||
#include "GPU3D_Soft.h"
|
||||
#include "GPU3D.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <string.h>
|
||||
#include "NDS.h"
|
||||
#include "GPU.h"
|
||||
#include "GPU3D.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
#include "GPU2D_Soft.h"
|
||||
#include "GPU.h"
|
||||
#include "GPU3D_OpenGL.h"
|
||||
#include "GPU3D.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "FIFO.h"
|
||||
#include "GPU3D_Soft.h"
|
||||
#include "Platform.h"
|
||||
#include "GPU3D.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
@ -349,7 +349,14 @@ public:
|
||||
virtual void RestartFrame(GPU& gpu) {};
|
||||
virtual u32* GetLine(int line) = 0;
|
||||
virtual void Blit(const GPU& gpu) {};
|
||||
|
||||
virtual void SetupAccelFrame() {}
|
||||
virtual void PrepareCaptureFrame() {}
|
||||
virtual void BindOutputTexture(int buffer) {}
|
||||
|
||||
virtual bool NeedsShaderCompile() { return false; }
|
||||
virtual void ShaderCompileStep(int& current, int& count) {}
|
||||
|
||||
protected:
|
||||
Renderer3D(bool Accelerated);
|
||||
};
|
||||
|
1136
src/GPU3D_Compute.cpp
Normal file
1136
src/GPU3D_Compute.cpp
Normal file
File diff suppressed because it is too large
Load Diff
242
src/GPU3D_Compute.h
Normal file
242
src/GPU3D_Compute.h
Normal file
@ -0,0 +1,242 @@
|
||||
/*
|
||||
Copyright 2016-2022 melonDS team
|
||||
|
||||
This file is part of melonDS.
|
||||
|
||||
melonDS is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation, either version 3 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#ifndef GPU3D_COMPUTE
|
||||
#define GPU3D_COMPUTE
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "GPU3D.h"
|
||||
|
||||
#include "OpenGLSupport.h"
|
||||
#include "GPU_OpenGL.h"
|
||||
|
||||
#include "GPU3D_TexcacheOpenGL.h"
|
||||
|
||||
#include "NonStupidBitfield.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
class ComputeRenderer : public Renderer3D
|
||||
{
|
||||
public:
|
||||
static std::unique_ptr<ComputeRenderer> New();
|
||||
~ComputeRenderer() override;
|
||||
|
||||
void Reset(GPU& gpu) override;
|
||||
|
||||
void SetRenderSettings(int scale, bool highResolutionCoordinates);
|
||||
|
||||
void VCount144(GPU& gpu) override;
|
||||
|
||||
void RenderFrame(GPU& gpu) override;
|
||||
void RestartFrame(GPU& gpu) override;
|
||||
u32* GetLine(int line) override;
|
||||
|
||||
void SetupAccelFrame() override;
|
||||
void PrepareCaptureFrame() override;
|
||||
|
||||
void BindOutputTexture(int buffer) override;
|
||||
|
||||
void Blit(const GPU& gpu) override;
|
||||
void Stop(const GPU& gpu) override;
|
||||
|
||||
bool NeedsShaderCompile() { return ShaderStepIdx != 33; }
|
||||
void ShaderCompileStep(int& current, int& count) override;
|
||||
private:
|
||||
ComputeRenderer(GLCompositor&& compositor);
|
||||
|
||||
GLuint ShaderInterpXSpans[2];
|
||||
GLuint ShaderBinCombined;
|
||||
GLuint ShaderDepthBlend[2];
|
||||
GLuint ShaderRasteriseNoTexture[2];
|
||||
GLuint ShaderRasteriseNoTextureToon[2];
|
||||
GLuint ShaderRasteriseNoTextureHighlight[2];
|
||||
GLuint ShaderRasteriseUseTextureDecal[2];
|
||||
GLuint ShaderRasteriseUseTextureModulate[2];
|
||||
GLuint ShaderRasteriseUseTextureToon[2];
|
||||
GLuint ShaderRasteriseUseTextureHighlight[2];
|
||||
GLuint ShaderRasteriseShadowMask[2];
|
||||
GLuint ShaderClearCoarseBinMask;
|
||||
GLuint ShaderClearIndirectWorkCount;
|
||||
GLuint ShaderCalculateWorkListOffset;
|
||||
GLuint ShaderSortWork;
|
||||
GLuint ShaderFinalPass[8];
|
||||
|
||||
GLuint YSpanIndicesTextureMemory;
|
||||
GLuint YSpanIndicesTexture;
|
||||
GLuint YSpanSetupMemory;
|
||||
GLuint XSpanSetupMemory;
|
||||
GLuint BinResultMemory;
|
||||
GLuint RenderPolygonMemory;
|
||||
GLuint WorkDescMemory;
|
||||
|
||||
enum
|
||||
{
|
||||
tilememoryLayer_Color,
|
||||
tilememoryLayer_Depth,
|
||||
tilememoryLayer_Attr,
|
||||
tilememoryLayer_Num,
|
||||
};
|
||||
|
||||
GLuint TileMemory[tilememoryLayer_Num];
|
||||
GLuint FinalTileMemory;
|
||||
|
||||
u32 DummyLine[256] = {};
|
||||
|
||||
struct SpanSetupY
|
||||
{
|
||||
// Attributes
|
||||
s32 Z0, Z1, W0, W1;
|
||||
s32 ColorR0, ColorG0, ColorB0;
|
||||
s32 ColorR1, ColorG1, ColorB1;
|
||||
s32 TexcoordU0, TexcoordV0;
|
||||
s32 TexcoordU1, TexcoordV1;
|
||||
|
||||
// Interpolator
|
||||
s32 I0, I1;
|
||||
s32 Linear;
|
||||
s32 IRecip;
|
||||
s32 W0n, W0d, W1d;
|
||||
|
||||
// Slope
|
||||
s32 Increment;
|
||||
|
||||
s32 X0, X1, Y0, Y1;
|
||||
s32 XMin, XMax;
|
||||
s32 DxInitial;
|
||||
|
||||
s32 XCovIncr;
|
||||
u32 IsDummy;
|
||||
};
|
||||
struct SpanSetupX
|
||||
{
|
||||
s32 X0, X1;
|
||||
|
||||
s32 EdgeLenL, EdgeLenR, EdgeCovL, EdgeCovR;
|
||||
|
||||
s32 XRecip;
|
||||
|
||||
u32 Flags;
|
||||
|
||||
s32 Z0, Z1, W0, W1;
|
||||
s32 ColorR0, ColorG0, ColorB0;
|
||||
s32 ColorR1, ColorG1, ColorB1;
|
||||
s32 TexcoordU0, TexcoordV0;
|
||||
s32 TexcoordU1, TexcoordV1;
|
||||
|
||||
s32 CovLInitial, CovRInitial;
|
||||
};
|
||||
struct SetupIndices
|
||||
{
|
||||
u16 PolyIdx, SpanIdxL, SpanIdxR, Y;
|
||||
};
|
||||
struct RenderPolygon
|
||||
{
|
||||
u32 FirstXSpan;
|
||||
s32 YTop, YBot;
|
||||
|
||||
s32 XMin, XMax;
|
||||
s32 XMinY, XMaxY;
|
||||
|
||||
u32 Variant;
|
||||
u32 Attr;
|
||||
|
||||
float TextureLayer;
|
||||
};
|
||||
|
||||
static constexpr int TileSize = 8;
|
||||
static constexpr int CoarseTileCountX = 8;
|
||||
static constexpr int CoarseTileCountY = 4;
|
||||
static constexpr int CoarseTileW = CoarseTileCountX * TileSize;
|
||||
static constexpr int CoarseTileH = CoarseTileCountY * TileSize;
|
||||
|
||||
static constexpr int BinStride = 2048/32;
|
||||
static constexpr int CoarseBinStride = BinStride/32;
|
||||
|
||||
static constexpr int MaxVariants = 256;
|
||||
|
||||
static constexpr int UniformIdxCurVariant = 0;
|
||||
static constexpr int UniformIdxTextureSize = 1;
|
||||
|
||||
static constexpr int MaxFullscreenLayers = 16;
|
||||
|
||||
struct BinResultHeader
|
||||
{
|
||||
u32 VariantWorkCount[MaxVariants*4];
|
||||
u32 SortedWorkOffset[MaxVariants];
|
||||
|
||||
u32 SortWorkWorkCount[4];
|
||||
};
|
||||
|
||||
static const int MaxYSpanSetups = 6144*2;
|
||||
std::vector<SetupIndices> YSpanIndices;
|
||||
SpanSetupY YSpanSetups[MaxYSpanSetups];
|
||||
RenderPolygon RenderPolygons[2048];
|
||||
|
||||
TexcacheOpenGL Texcache;
|
||||
|
||||
struct MetaUniform
|
||||
{
|
||||
u32 NumPolygons;
|
||||
u32 NumVariants;
|
||||
|
||||
u32 AlphaRef;
|
||||
u32 DispCnt;
|
||||
|
||||
u32 ToonTable[4*34];
|
||||
|
||||
u32 ClearColor, ClearDepth, ClearAttr;
|
||||
|
||||
u32 FogOffset, FogShift, FogColor;
|
||||
};
|
||||
GLuint MetaUniformMemory;
|
||||
|
||||
GLuint Samplers[9];
|
||||
|
||||
GLuint Framebuffer = 0;
|
||||
GLuint LowResFramebuffer;
|
||||
GLuint PixelBuffer;
|
||||
|
||||
u32 FramebufferCPU[256*192];
|
||||
|
||||
int ScreenWidth, ScreenHeight;
|
||||
int TilesPerLine, TileLines;
|
||||
int ScaleFactor = -1;
|
||||
int MaxWorkTiles;
|
||||
bool HiresCoordinates;
|
||||
|
||||
GLCompositor CurGLCompositor;
|
||||
|
||||
int ShaderStepIdx = 0;
|
||||
|
||||
void DeleteShaders();
|
||||
|
||||
void SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to);
|
||||
void SetupYSpan(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int from, int to, int side, s32 positions[10][2]);
|
||||
void SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int vertex, int side, s32 positions[10][2]);
|
||||
|
||||
bool CompileShader(GLuint& shader, const std::string& source, const std::initializer_list<const char*>& defines);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
1665
src/GPU3D_Compute_shaders.h
Normal file
1665
src/GPU3D_Compute_shaders.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -28,46 +28,32 @@
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
||||
bool GLRenderer::BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs)
|
||||
{
|
||||
char shadername[32];
|
||||
snprintf(shadername, sizeof(shadername), "RenderShader%02X", flags);
|
||||
|
||||
int headerlen = strlen(kShaderHeader);
|
||||
|
||||
int vslen = strlen(vs);
|
||||
int vsclen = strlen(kRenderVSCommon);
|
||||
char* vsbuf = new char[headerlen + vsclen + vslen + 1];
|
||||
strcpy(&vsbuf[0], kShaderHeader);
|
||||
strcpy(&vsbuf[headerlen], kRenderVSCommon);
|
||||
strcpy(&vsbuf[headerlen + vsclen], vs);
|
||||
std::string vsbuf;
|
||||
vsbuf += kShaderHeader;
|
||||
vsbuf += kRenderVSCommon;
|
||||
vsbuf += vs;
|
||||
|
||||
int fslen = strlen(fs);
|
||||
int fsclen = strlen(kRenderFSCommon);
|
||||
char* fsbuf = new char[headerlen + fsclen + fslen + 1];
|
||||
strcpy(&fsbuf[0], kShaderHeader);
|
||||
strcpy(&fsbuf[headerlen], kRenderFSCommon);
|
||||
strcpy(&fsbuf[headerlen + fsclen], fs);
|
||||
std::string fsbuf;
|
||||
fsbuf += kShaderHeader;
|
||||
fsbuf += kRenderFSCommon;
|
||||
fsbuf += fs;
|
||||
|
||||
bool ret = OpenGL::BuildShaderProgram(vsbuf, fsbuf, RenderShader[flags], shadername);
|
||||
|
||||
delete[] vsbuf;
|
||||
delete[] fsbuf;
|
||||
GLuint prog;
|
||||
bool ret = OpenGL::CompileVertexFragmentProgram(prog,
|
||||
vsbuf, fsbuf,
|
||||
shadername,
|
||||
{{"vPosition", 0}, {"vColor", 1}, {"vTexcoord", 2}, {"vPolygonAttr", 3}},
|
||||
{{"oColor", 0}, {"oAttr", 1}});
|
||||
|
||||
if (!ret) return false;
|
||||
|
||||
GLuint prog = RenderShader[flags][2];
|
||||
|
||||
glBindAttribLocation(prog, 0, "vPosition");
|
||||
glBindAttribLocation(prog, 1, "vColor");
|
||||
glBindAttribLocation(prog, 2, "vTexcoord");
|
||||
glBindAttribLocation(prog, 3, "vPolygonAttr");
|
||||
glBindFragDataLocation(prog, 0, "oColor");
|
||||
glBindFragDataLocation(prog, 1, "oAttr");
|
||||
|
||||
if (!OpenGL::LinkShaderProgram(RenderShader[flags]))
|
||||
return false;
|
||||
|
||||
GLint uni_id = glGetUniformBlockIndex(prog, "uConfig");
|
||||
glUniformBlockBinding(prog, uni_id, 0);
|
||||
|
||||
@ -78,13 +64,15 @@ bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
||||
uni_id = glGetUniformLocation(prog, "TexPalMem");
|
||||
glUniform1i(uni_id, 1);
|
||||
|
||||
RenderShader[flags] = prog;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GLRenderer::UseRenderShader(u32 flags)
|
||||
{
|
||||
if (CurShaderID == flags) return;
|
||||
glUseProgram(RenderShader[flags][2]);
|
||||
glUseProgram(RenderShader[flags]);
|
||||
CurShaderID = flags;
|
||||
}
|
||||
|
||||
@ -125,21 +113,17 @@ std::unique_ptr<GLRenderer> GLRenderer::New() noexcept
|
||||
glDepthRange(0, 1);
|
||||
glClearDepth(1.0);
|
||||
|
||||
|
||||
if (!OpenGL::BuildShaderProgram(kClearVS, kClearFS, result->ClearShaderPlain, "ClearShader"))
|
||||
if (!OpenGL::CompileVertexFragmentProgram(result->ClearShaderPlain,
|
||||
kClearVS, kClearFS,
|
||||
"ClearShader",
|
||||
{{"vPosition", 0}},
|
||||
{{"oColor", 0}, {"oAttr", 1}}))
|
||||
return nullptr;
|
||||
|
||||
glBindAttribLocation(result->ClearShaderPlain[2], 0, "vPosition");
|
||||
glBindFragDataLocation(result->ClearShaderPlain[2], 0, "oColor");
|
||||
glBindFragDataLocation(result->ClearShaderPlain[2], 1, "oAttr");
|
||||
|
||||
if (!OpenGL::LinkShaderProgram(result->ClearShaderPlain))
|
||||
return nullptr;
|
||||
|
||||
result->ClearUniformLoc[0] = glGetUniformLocation(result->ClearShaderPlain[2], "uColor");
|
||||
result->ClearUniformLoc[1] = glGetUniformLocation(result->ClearShaderPlain[2], "uDepth");
|
||||
result->ClearUniformLoc[2] = glGetUniformLocation(result->ClearShaderPlain[2], "uOpaquePolyID");
|
||||
result->ClearUniformLoc[3] = glGetUniformLocation(result->ClearShaderPlain[2], "uFogFlag");
|
||||
result->ClearUniformLoc[0] = glGetUniformLocation(result->ClearShaderPlain, "uColor");
|
||||
result->ClearUniformLoc[1] = glGetUniformLocation(result->ClearShaderPlain, "uDepth");
|
||||
result->ClearUniformLoc[2] = glGetUniformLocation(result->ClearShaderPlain, "uOpaquePolyID");
|
||||
result->ClearUniformLoc[3] = glGetUniformLocation(result->ClearShaderPlain, "uFogFlag");
|
||||
|
||||
memset(result->RenderShader, 0, sizeof(RenderShader));
|
||||
|
||||
@ -167,42 +151,35 @@ std::unique_ptr<GLRenderer> GLRenderer::New() noexcept
|
||||
if (!result->BuildRenderShader(RenderFlag_ShadowMask | RenderFlag_WBuffer, kRenderVS_W, kRenderFS_WSM))
|
||||
return nullptr;
|
||||
|
||||
if (!OpenGL::BuildShaderProgram(kFinalPassVS, kFinalPassEdgeFS, result->FinalPassEdgeShader, "FinalPassEdgeShader"))
|
||||
if (!OpenGL::CompileVertexFragmentProgram(result->FinalPassEdgeShader,
|
||||
kFinalPassVS, kFinalPassEdgeFS,
|
||||
"FinalPassEdgeShader",
|
||||
{{"vPosition", 0}},
|
||||
{{"oColor", 0}}))
|
||||
return nullptr;
|
||||
if (!OpenGL::CompileVertexFragmentProgram(result->FinalPassFogShader,
|
||||
kFinalPassVS, kFinalPassFogFS,
|
||||
"FinalPassFogShader",
|
||||
{{"vPosition", 0}},
|
||||
{{"oColor", 0}}))
|
||||
return nullptr;
|
||||
|
||||
if (!OpenGL::BuildShaderProgram(kFinalPassVS, kFinalPassFogFS, result->FinalPassFogShader, "FinalPassFogShader"))
|
||||
return nullptr;
|
||||
GLuint uni_id = glGetUniformBlockIndex(result->FinalPassEdgeShader, "uConfig");
|
||||
glUniformBlockBinding(result->FinalPassEdgeShader, uni_id, 0);
|
||||
|
||||
glBindAttribLocation(result->FinalPassEdgeShader[2], 0, "vPosition");
|
||||
glBindFragDataLocation(result->FinalPassEdgeShader[2], 0, "oColor");
|
||||
|
||||
if (!OpenGL::LinkShaderProgram(result->FinalPassEdgeShader))
|
||||
return nullptr;
|
||||
|
||||
GLint uni_id = glGetUniformBlockIndex(result->FinalPassEdgeShader[2], "uConfig");
|
||||
glUniformBlockBinding(result->FinalPassEdgeShader[2], uni_id, 0);
|
||||
|
||||
glUseProgram(result->FinalPassEdgeShader[2]);
|
||||
|
||||
uni_id = glGetUniformLocation(result->FinalPassEdgeShader[2], "DepthBuffer");
|
||||
glUseProgram(result->FinalPassEdgeShader);
|
||||
uni_id = glGetUniformLocation(result->FinalPassEdgeShader, "DepthBuffer");
|
||||
glUniform1i(uni_id, 0);
|
||||
uni_id = glGetUniformLocation(result->FinalPassEdgeShader[2], "AttrBuffer");
|
||||
uni_id = glGetUniformLocation(result->FinalPassEdgeShader, "AttrBuffer");
|
||||
glUniform1i(uni_id, 1);
|
||||
|
||||
glBindAttribLocation(result->FinalPassFogShader[2], 0, "vPosition");
|
||||
glBindFragDataLocation(result->FinalPassFogShader[2], 0, "oColor");
|
||||
uni_id = glGetUniformBlockIndex(result->FinalPassFogShader, "uConfig");
|
||||
glUniformBlockBinding(result->FinalPassFogShader, uni_id, 0);
|
||||
|
||||
if (!OpenGL::LinkShaderProgram(result->FinalPassFogShader))
|
||||
return nullptr;
|
||||
|
||||
uni_id = glGetUniformBlockIndex(result->FinalPassFogShader[2], "uConfig");
|
||||
glUniformBlockBinding(result->FinalPassFogShader[2], uni_id, 0);
|
||||
|
||||
glUseProgram(result->FinalPassFogShader[2]);
|
||||
|
||||
uni_id = glGetUniformLocation(result->FinalPassFogShader[2], "DepthBuffer");
|
||||
glUseProgram(result->FinalPassFogShader);
|
||||
uni_id = glGetUniformLocation(result->FinalPassFogShader, "DepthBuffer");
|
||||
glUniform1i(uni_id, 0);
|
||||
uni_id = glGetUniformLocation(result->FinalPassFogShader[2], "AttrBuffer");
|
||||
uni_id = glGetUniformLocation(result->FinalPassFogShader, "AttrBuffer");
|
||||
glUniform1i(uni_id, 1);
|
||||
|
||||
|
||||
@ -255,29 +232,26 @@ std::unique_ptr<GLRenderer> GLRenderer::New() noexcept
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, result->IndexBufferID);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), nullptr, GL_DYNAMIC_DRAW);
|
||||
|
||||
glGenFramebuffers(4, &result->FramebufferID[0]);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, result->FramebufferID[0]);
|
||||
|
||||
glGenTextures(8, &result->FramebufferTex[0]);
|
||||
result->FrontBuffer = 0;
|
||||
glGenFramebuffers(1, &result->MainFramebuffer);
|
||||
|
||||
// color buffers
|
||||
SetupDefaultTexParams(result->FramebufferTex[0]);
|
||||
SetupDefaultTexParams(result->FramebufferTex[1]);
|
||||
glGenTextures(1, &result->ColorBufferTex);
|
||||
SetupDefaultTexParams(result->ColorBufferTex);
|
||||
|
||||
// depth/stencil buffer
|
||||
SetupDefaultTexParams(result->FramebufferTex[4]);
|
||||
SetupDefaultTexParams(result->FramebufferTex[6]);
|
||||
glGenTextures(1, &result->DepthBufferTex);
|
||||
SetupDefaultTexParams(result->DepthBufferTex);
|
||||
|
||||
// attribute buffer
|
||||
// R: opaque polyID (for edgemarking)
|
||||
// G: edge flag
|
||||
// B: fog flag
|
||||
SetupDefaultTexParams(result->FramebufferTex[5]);
|
||||
SetupDefaultTexParams(result->FramebufferTex[7]);
|
||||
glGenTextures(1, &result->AttrBufferTex);
|
||||
SetupDefaultTexParams(result->AttrBufferTex);
|
||||
|
||||
// downscale framebuffer for display capture (always 256x192)
|
||||
SetupDefaultTexParams(result->FramebufferTex[3]);
|
||||
glGenTextures(1, &result->DownScaleBufferTex);
|
||||
SetupDefaultTexParams(result->DownScaleBufferTex);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 192, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
@ -315,8 +289,12 @@ GLRenderer::~GLRenderer()
|
||||
glDeleteTextures(1, &TexMemID);
|
||||
glDeleteTextures(1, &TexPalMemID);
|
||||
|
||||
glDeleteFramebuffers(4, &FramebufferID[0]);
|
||||
glDeleteTextures(8, &FramebufferTex[0]);
|
||||
glDeleteFramebuffers(1, &MainFramebuffer);
|
||||
glDeleteFramebuffers(1, &DownscaleFramebuffer);
|
||||
glDeleteTextures(1, &ColorBufferTex);
|
||||
glDeleteTextures(1, &DepthBufferTex);
|
||||
glDeleteTextures(1, &AttrBufferTex);
|
||||
glDeleteTextures(1, &DownScaleBufferTex);
|
||||
|
||||
glDeleteVertexArrays(1, &VertexArrayID);
|
||||
glDeleteBuffers(1, &VertexBufferID);
|
||||
@ -327,8 +305,8 @@ GLRenderer::~GLRenderer()
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
if (!RenderShader[i][2]) continue;
|
||||
OpenGL::DeleteShaderProgram(RenderShader[i]);
|
||||
if (!RenderShader[i]) continue;
|
||||
glDeleteProgram(RenderShader[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -361,40 +339,25 @@ void GLRenderer::SetRenderSettings(bool betterpolygons, int scale) noexcept
|
||||
ScreenW = 256 * scale;
|
||||
ScreenH = 192 * scale;
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[0]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[1]);
|
||||
glBindTexture(GL_TEXTURE_2D, ColorBufferTex);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[4]);
|
||||
glBindTexture(GL_TEXTURE_2D, DepthBufferTex);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW, ScreenH, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[5]);
|
||||
glBindTexture(GL_TEXTURE_2D, AttrBufferTex);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, ScreenW, ScreenH, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[6]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW, ScreenH, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[7]);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, ScreenW, ScreenH, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[3]);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[3], 0);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, DownscaleFramebuffer);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, DownScaleBufferTex, 0);
|
||||
|
||||
GLenum fbassign[2] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1};
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[0], 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[4], 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[5], 0);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, MainFramebuffer);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, ColorBufferTex, 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, DepthBufferTex, 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, AttrBufferTex, 0);
|
||||
glDrawBuffers(2, fbassign);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[1]);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[1], 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[6], 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[7], 0);
|
||||
glDrawBuffers(2, fbassign);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ);
|
||||
|
||||
@ -1103,9 +1066,9 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h)
|
||||
glStencilMask(0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer ? 6 : 4]);
|
||||
glBindTexture(GL_TEXTURE_2D, DepthBufferTex);
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer ? 7 : 5]);
|
||||
glBindTexture(GL_TEXTURE_2D, AttrBufferTex);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID);
|
||||
glBindVertexArray(ClearVertexArrayID);
|
||||
@ -1115,7 +1078,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h)
|
||||
// edge marking
|
||||
// TODO: depth/polyid values at screen edges
|
||||
|
||||
glUseProgram(FinalPassEdgeShader[2]);
|
||||
glUseProgram(FinalPassEdgeShader);
|
||||
|
||||
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE);
|
||||
|
||||
@ -1126,7 +1089,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h)
|
||||
{
|
||||
// fog
|
||||
|
||||
glUseProgram(FinalPassFogShader[2]);
|
||||
glUseProgram(FinalPassFogShader);
|
||||
|
||||
if (gpu3d.RenderDispCnt & (1<<6))
|
||||
glBlendFuncSeparate(GL_ZERO, GL_ONE, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA);
|
||||
@ -1154,7 +1117,7 @@ void GLRenderer::RenderFrame(GPU& gpu)
|
||||
CurShaderID = -1;
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[FrontBuffer]);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, MainFramebuffer);
|
||||
|
||||
ShaderConfig.uScreenSize[0] = ScreenW;
|
||||
ShaderConfig.uScreenSize[1] = ScreenH;
|
||||
@ -1260,7 +1223,7 @@ void GLRenderer::RenderFrame(GPU& gpu)
|
||||
// TODO: check whether 'clear polygon ID' affects translucent polyID
|
||||
// (for example when alpha is 1..30)
|
||||
{
|
||||
glUseProgram(ClearShaderPlain[2]);
|
||||
glUseProgram(ClearShaderPlain);
|
||||
glDepthFunc(GL_ALWAYS);
|
||||
|
||||
u32 r = gpu.GPU3D.RenderClearAttr1 & 0x1F;
|
||||
@ -1320,8 +1283,6 @@ void GLRenderer::RenderFrame(GPU& gpu)
|
||||
|
||||
RenderSceneChunk(gpu.GPU3D, 0, 192);
|
||||
}
|
||||
|
||||
FrontBuffer = FrontBuffer ? 0 : 1;
|
||||
}
|
||||
|
||||
void GLRenderer::Stop(const GPU& gpu)
|
||||
@ -1331,16 +1292,14 @@ void GLRenderer::Stop(const GPU& gpu)
|
||||
|
||||
void GLRenderer::PrepareCaptureFrame()
|
||||
{
|
||||
// TODO: make sure this picks the right buffer when doing antialiasing
|
||||
int original_fb = FrontBuffer^1;
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[original_fb]);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, MainFramebuffer);
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT0);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[3]);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, DownscaleFramebuffer);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT0);
|
||||
glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, 256, 192, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[3]);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, DownscaleFramebuffer);
|
||||
glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
|
||||
}
|
||||
|
||||
@ -1349,12 +1308,18 @@ void GLRenderer::Blit(const GPU& gpu)
|
||||
CurGLCompositor.RenderFrame(gpu, *this);
|
||||
}
|
||||
|
||||
void GLRenderer::BindOutputTexture(int buffer)
|
||||
{
|
||||
CurGLCompositor.BindOutputTexture(buffer);
|
||||
}
|
||||
|
||||
u32* GLRenderer::GetLine(int line)
|
||||
{
|
||||
int stride = 256;
|
||||
|
||||
if (line == 0)
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
|
||||
u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||
if (data) memcpy(&Framebuffer[stride*0], data, 4*stride*192);
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
@ -1374,7 +1339,7 @@ u32* GLRenderer::GetLine(int line)
|
||||
|
||||
void GLRenderer::SetupAccelFrame()
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]);
|
||||
glBindTexture(GL_TEXTURE_2D, ColorBufferTex);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -44,12 +44,11 @@ public:
|
||||
void Stop(const GPU& gpu) override;
|
||||
u32* GetLine(int line) override;
|
||||
|
||||
void SetupAccelFrame();
|
||||
void SetupAccelFrame() override;
|
||||
void PrepareCaptureFrame() override;
|
||||
void Blit(const GPU& gpu) override;
|
||||
|
||||
[[nodiscard]] const GLCompositor& GetCompositor() const noexcept { return CurGLCompositor; }
|
||||
GLCompositor& GetCompositor() noexcept { return CurGLCompositor; }
|
||||
void BindOutputTexture(int buffer) override;
|
||||
|
||||
static std::unique_ptr<GLRenderer> New() noexcept;
|
||||
private:
|
||||
@ -77,7 +76,7 @@ private:
|
||||
GLCompositor CurGLCompositor;
|
||||
RendererPolygon PolygonList[2048] {};
|
||||
|
||||
bool BuildRenderShader(u32 flags, const char* vs, const char* fs);
|
||||
bool BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs);
|
||||
void UseRenderShader(u32 flags);
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
|
||||
u32* SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u32 vtxattr, u32* vptr) const;
|
||||
@ -96,13 +95,13 @@ private:
|
||||
};
|
||||
|
||||
|
||||
GLuint ClearShaderPlain[3] {};
|
||||
GLuint ClearShaderPlain {};
|
||||
|
||||
GLuint RenderShader[16][3] {};
|
||||
GLuint RenderShader[16] {};
|
||||
GLuint CurShaderID = -1;
|
||||
|
||||
GLuint FinalPassEdgeShader[3] {};
|
||||
GLuint FinalPassFogShader[3] {};
|
||||
GLuint FinalPassEdgeShader {};
|
||||
GLuint FinalPassFogShader {};
|
||||
|
||||
// std140 compliant structure
|
||||
struct
|
||||
@ -155,12 +154,12 @@ private:
|
||||
bool BetterPolygons {};
|
||||
int ScreenW {}, ScreenH {};
|
||||
|
||||
GLuint FramebufferTex[8] {};
|
||||
int FrontBuffer {};
|
||||
GLuint FramebufferID[4] {}, PixelbufferID {};
|
||||
GLuint ColorBufferTex {}, DepthBufferTex {}, AttrBufferTex {};
|
||||
GLuint DownScaleBufferTex {};
|
||||
GLuint PixelbufferID {};
|
||||
|
||||
GLuint MainFramebuffer {}, DownscaleFramebuffer {};
|
||||
u32 Framebuffer[256*192] {};
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
@ -95,8 +95,8 @@ void SoftRenderer::EnableRenderThread()
|
||||
}
|
||||
}
|
||||
|
||||
SoftRenderer::SoftRenderer(bool threaded) noexcept
|
||||
: Renderer3D(false), Threaded(threaded)
|
||||
SoftRenderer::SoftRenderer() noexcept
|
||||
: Renderer3D(false)
|
||||
{
|
||||
Sema_RenderStart = Platform::Semaphore_Create();
|
||||
Sema_RenderDone = Platform::Semaphore_Create();
|
||||
|
@ -29,7 +29,7 @@ namespace melonDS
|
||||
class SoftRenderer : public Renderer3D
|
||||
{
|
||||
public:
|
||||
SoftRenderer(bool threaded = false) noexcept;
|
||||
SoftRenderer() noexcept;
|
||||
~SoftRenderer() override;
|
||||
void Reset(GPU& gpu) override;
|
||||
|
||||
@ -504,7 +504,7 @@ private:
|
||||
|
||||
// threading
|
||||
|
||||
bool Threaded;
|
||||
bool Threaded = false;
|
||||
Platform::Thread* RenderThread;
|
||||
std::atomic_bool RenderThreadRunning;
|
||||
std::atomic_bool RenderThreadRendering;
|
||||
|
269
src/GPU3D_Texcache.cpp
Normal file
269
src/GPU3D_Texcache.cpp
Normal file
@ -0,0 +1,269 @@
|
||||
#include "GPU3D_Texcache.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
inline u16 ColorAvg(u16 color0, u16 color1)
|
||||
{
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
u32 b0 = color0 & 0x7C00;
|
||||
u32 r1 = color1 & 0x001F;
|
||||
u32 g1 = color1 & 0x03E0;
|
||||
u32 b1 = color1 & 0x7C00;
|
||||
|
||||
u32 r = (r0 + r1) >> 1;
|
||||
u32 g = ((g0 + g1) >> 1) & 0x03E0;
|
||||
u32 b = ((b0 + b1) >> 1) & 0x7C00;
|
||||
|
||||
return r | g | b;
|
||||
}
|
||||
|
||||
inline u16 Color5of3(u16 color0, u16 color1)
|
||||
{
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
u32 b0 = color0 & 0x7C00;
|
||||
u32 r1 = color1 & 0x001F;
|
||||
u32 g1 = color1 & 0x03E0;
|
||||
u32 b1 = color1 & 0x7C00;
|
||||
|
||||
u32 r = (r0*5 + r1*3) >> 3;
|
||||
u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
|
||||
u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
|
||||
|
||||
return r | g | b;
|
||||
}
|
||||
|
||||
inline u16 Color3of5(u16 color0, u16 color1)
|
||||
{
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
u32 b0 = color0 & 0x7C00;
|
||||
u32 r1 = color1 & 0x001F;
|
||||
u32 g1 = color1 & 0x03E0;
|
||||
u32 b1 = color1 & 0x7C00;
|
||||
|
||||
u32 r = (r0*3 + r1*5) >> 3;
|
||||
u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
|
||||
u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
|
||||
|
||||
return r | g | b;
|
||||
}
|
||||
|
||||
inline u32 ConvertRGB5ToRGB8(u16 val)
|
||||
{
|
||||
return (((u32)val & 0x1F) << 3)
|
||||
| (((u32)val & 0x3E0) << 6)
|
||||
| (((u32)val & 0x7C00) << 9);
|
||||
}
|
||||
inline u32 ConvertRGB5ToBGR8(u16 val)
|
||||
{
|
||||
return (((u32)val & 0x1F) << 9)
|
||||
| (((u32)val & 0x3E0) << 6)
|
||||
| (((u32)val & 0x7C00) << 3);
|
||||
}
|
||||
inline u32 ConvertRGB5ToRGB6(u16 val)
|
||||
{
|
||||
u8 r = (val & 0x1F) << 1;
|
||||
u8 g = (val & 0x3E0) >> 4;
|
||||
u8 b = (val & 0x7C00) >> 9;
|
||||
if (r) r++;
|
||||
if (g) g++;
|
||||
if (b) b++;
|
||||
return (u32)r | ((u32)g << 8) | ((u32)b << 16);
|
||||
}
|
||||
|
||||
template <int outputFmt>
|
||||
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
|
||||
{
|
||||
for (u32 i = 0; i < width*height; i++)
|
||||
{
|
||||
u16 value = *(u16*)&texData[i * 2];
|
||||
|
||||
switch (outputFmt)
|
||||
{
|
||||
case outputFmt_RGB6A5:
|
||||
output[i] = ConvertRGB5ToRGB6(value) | (value & 0x8000 ? 0x1F000000 : 0);
|
||||
break;
|
||||
case outputFmt_RGBA8:
|
||||
output[i] = ConvertRGB5ToRGB8(value) | (value & 0x8000 ? 0xFF000000 : 0);
|
||||
break;
|
||||
case outputFmt_BGRA8:
|
||||
output[i] = ConvertRGB5ToBGR8(value) | (value & 0x8000 ? 0xFF000000 : 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u8* texData);
|
||||
|
||||
template <int outputFmt>
|
||||
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData)
|
||||
{
|
||||
// we process a whole block at the time
|
||||
for (int y = 0; y < height / 4; y++)
|
||||
{
|
||||
for (int x = 0; x < width / 4; x++)
|
||||
{
|
||||
u32 data = ((u32*)texData)[x + y * (width / 4)];
|
||||
u16 auxData = ((u16*)texAuxData)[x + y * (width / 4)];
|
||||
|
||||
u32 paletteOffset = auxData & 0x3FFF;
|
||||
u16 color0 = palData[paletteOffset*2] | 0x8000;
|
||||
u16 color1 = palData[paletteOffset*2+1] | 0x8000;
|
||||
u16 color2, color3;
|
||||
|
||||
switch ((auxData >> 14) & 0x3)
|
||||
{
|
||||
case 0:
|
||||
color2 = palData[paletteOffset*2+2] | 0x8000;
|
||||
color3 = 0;
|
||||
break;
|
||||
case 1:
|
||||
{
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
u32 b0 = color0 & 0x7C00;
|
||||
u32 r1 = color1 & 0x001F;
|
||||
u32 g1 = color1 & 0x03E0;
|
||||
u32 b1 = color1 & 0x7C00;
|
||||
|
||||
u32 r = (r0 + r1) >> 1;
|
||||
u32 g = ((g0 + g1) >> 1) & 0x03E0;
|
||||
u32 b = ((b0 + b1) >> 1) & 0x7C00;
|
||||
color2 = r | g | b | 0x8000;
|
||||
}
|
||||
color3 = 0;
|
||||
break;
|
||||
case 2:
|
||||
color2 = palData[paletteOffset*2+2] | 0x8000;
|
||||
color3 = palData[paletteOffset*2+3] | 0x8000;
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
u32 b0 = color0 & 0x7C00;
|
||||
u32 r1 = color1 & 0x001F;
|
||||
u32 g1 = color1 & 0x03E0;
|
||||
u32 b1 = color1 & 0x7C00;
|
||||
|
||||
u32 r = (r0*5 + r1*3) >> 3;
|
||||
u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
|
||||
u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
|
||||
|
||||
color2 = r | g | b | 0x8000;
|
||||
}
|
||||
{
|
||||
u32 r0 = color0 & 0x001F;
|
||||
u32 g0 = color0 & 0x03E0;
|
||||
u32 b0 = color0 & 0x7C00;
|
||||
u32 r1 = color1 & 0x001F;
|
||||
u32 g1 = color1 & 0x03E0;
|
||||
u32 b1 = color1 & 0x7C00;
|
||||
|
||||
u32 r = (r0*3 + r1*5) >> 3;
|
||||
u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
|
||||
u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
|
||||
|
||||
color3 = r | g | b | 0x8000;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// in 2020 our default data types are big enough to be used as lookup tables...
|
||||
u64 packed = color0 | ((u64)color1 << 16) | ((u64)color2 << 32) | ((u64)color3 << 48);
|
||||
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
u16 color = (packed >> 16 * (data >> 2 * (i + j * 4))) & 0xFFFF;
|
||||
u32 res;
|
||||
switch (outputFmt)
|
||||
{
|
||||
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
|
||||
| ((color & 0x8000) ? 0x1F000000 : 0); break;
|
||||
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
|
||||
| ((color & 0x8000) ? 0xFF000000 : 0); break;
|
||||
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
|
||||
| ((color & 0x8000) ? 0xFF000000 : 0); break;
|
||||
}
|
||||
output[x * 4 + i + (y * 4 + j) * width] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u8*, u8*, u16*);
|
||||
|
||||
template <int outputFmt, int X, int Y>
|
||||
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
|
||||
{
|
||||
for (int y = 0; y < height; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u8 val = texData[x + y * width];
|
||||
|
||||
u32 idx = val & ((1 << Y) - 1);
|
||||
|
||||
u16 color = palData[idx];
|
||||
u32 alpha = (val >> Y) & ((1 << X) - 1);
|
||||
if (X != 5)
|
||||
alpha = alpha * 4 + alpha / 2;
|
||||
|
||||
u32 res;
|
||||
switch (outputFmt)
|
||||
{
|
||||
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color) | alpha << 24; break;
|
||||
// make sure full alpha == 255
|
||||
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color) | (alpha << 27 | (alpha & 0x1C) << 22); break;
|
||||
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color) | (alpha << 27 | (alpha & 0x1C) << 22); break;
|
||||
}
|
||||
output[x + y * width] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u8*, u16*);
|
||||
template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u8*, u16*);
|
||||
|
||||
template <int outputFmt, int colorBits>
|
||||
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
|
||||
{
|
||||
for (int y = 0; y < height; y++)
|
||||
{
|
||||
for (int x = 0; x < width / (8 / colorBits); x++)
|
||||
{
|
||||
u8 val = texData[x + y * (width / (8 / colorBits))];
|
||||
|
||||
for (int i = 0; i < 8 / colorBits; i++)
|
||||
{
|
||||
u32 index = (val >> (i * colorBits)) & ((1 << colorBits) - 1);
|
||||
u16 color = palData[index];
|
||||
|
||||
bool transparent = color0Transparent && index == 0;
|
||||
u32 res;
|
||||
switch (outputFmt)
|
||||
{
|
||||
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
|
||||
| (transparent ? 0 : 0x1F000000); break;
|
||||
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
|
||||
| (transparent ? 0 : 0xFF000000); break;
|
||||
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
|
||||
| (transparent ? 0 : 0xFF000000); break;
|
||||
}
|
||||
output[x * (8 / colorBits) + y * width + i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u8*, u16*, bool);
|
||||
template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u8*, u16*, bool);
|
||||
template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u8*, u16*, bool);
|
||||
|
||||
}
|
310
src/GPU3D_Texcache.h
Normal file
310
src/GPU3D_Texcache.h
Normal file
@ -0,0 +1,310 @@
|
||||
#ifndef GPU3D_TEXCACHE
|
||||
#define GPU3D_TEXCACHE
|
||||
|
||||
#include "types.h"
|
||||
#include "GPU.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#include "xxhash/xxhash.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
inline u32 TextureWidth(u32 texparam)
|
||||
{
|
||||
return 8 << ((texparam >> 20) & 0x7);
|
||||
}
|
||||
|
||||
inline u32 TextureHeight(u32 texparam)
|
||||
{
|
||||
return 8 << ((texparam >> 23) & 0x7);
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
outputFmt_RGB6A5,
|
||||
outputFmt_RGBA8,
|
||||
outputFmt_BGRA8
|
||||
};
|
||||
|
||||
template <int outputFmt>
|
||||
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData);
|
||||
template <int outputFmt>
|
||||
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData);
|
||||
template <int outputFmt, int X, int Y>
|
||||
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData);
|
||||
template <int outputFmt, int colorBits>
|
||||
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent);
|
||||
|
||||
template <typename TexLoaderT, typename TexHandleT>
|
||||
class Texcache
|
||||
{
|
||||
public:
|
||||
Texcache(const TexLoaderT& texloader)
|
||||
: TexLoader(texloader) // probably better if this would be a move constructor???
|
||||
{}
|
||||
|
||||
bool Update(GPU& gpu)
|
||||
{
|
||||
auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu);
|
||||
auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu);
|
||||
|
||||
bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty);
|
||||
bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty);
|
||||
|
||||
if (textureChanged || texPalChanged)
|
||||
{
|
||||
//printf("check invalidation %d\n", TexCache.size());
|
||||
for (auto it = Cache.begin(); it != Cache.end();)
|
||||
{
|
||||
TexCacheEntry& entry = it->second;
|
||||
if (textureChanged)
|
||||
{
|
||||
for (u32 i = 0; i < 2; i++)
|
||||
{
|
||||
u32 startBit = entry.TextureRAMStart[i] / VRAMDirtyGranularity;
|
||||
u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
|
||||
|
||||
u32 startEntry = startBit >> 6;
|
||||
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
||||
for (u32 j = startEntry; j < startEntry + entriesCount; j++)
|
||||
{
|
||||
if (GetRangedBitMask(j, startBit, bitsCount) & textureDirty.Data[j])
|
||||
{
|
||||
u64 newTexHash = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
|
||||
|
||||
if (newTexHash != entry.TextureHash[i])
|
||||
goto invalidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (texPalChanged && entry.TexPalSize > 0)
|
||||
{
|
||||
u32 startBit = entry.TexPalStart / VRAMDirtyGranularity;
|
||||
u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
|
||||
|
||||
u32 startEntry = startBit >> 6;
|
||||
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
||||
for (u32 j = startEntry; j < startEntry + entriesCount; j++)
|
||||
{
|
||||
if (GetRangedBitMask(j, startBit, bitsCount) & texPalDirty.Data[j])
|
||||
{
|
||||
u64 newPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
|
||||
if (newPalHash != entry.TexPalHash)
|
||||
goto invalidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
it++;
|
||||
continue;
|
||||
invalidate:
|
||||
FreeTextures[entry.WidthLog2][entry.HeightLog2].push_back(entry.Texture);
|
||||
|
||||
//printf("invalidating texture %d\n", entry.ImageDescriptor);
|
||||
|
||||
it = Cache.erase(it);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void GetTexture(GPU& gpu, u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper)
|
||||
{
|
||||
// remove sampling and texcoord gen params
|
||||
texParam &= ~0xC00F0000;
|
||||
|
||||
u32 fmt = (texParam >> 26) & 0x7;
|
||||
u64 key = texParam;
|
||||
if (fmt != 7)
|
||||
{
|
||||
key |= (u64)palBase << 32;
|
||||
if (fmt == 5)
|
||||
key &= ~((u64)1 << 29);
|
||||
}
|
||||
//printf("%" PRIx64 " %" PRIx32 " %" PRIx32 "\n", key, texParam, palBase);
|
||||
|
||||
assert(fmt != 0 && "no texture is not a texture format!");
|
||||
|
||||
auto it = Cache.find(key);
|
||||
|
||||
if (it != Cache.end())
|
||||
{
|
||||
textureHandle = it->second.Texture.TextureID;
|
||||
layer = it->second.Texture.Layer;
|
||||
helper = &it->second.LastVariant;
|
||||
return;
|
||||
}
|
||||
|
||||
u32 widthLog2 = (texParam >> 20) & 0x7;
|
||||
u32 heightLog2 = (texParam >> 23) & 0x7;
|
||||
u32 width = 8 << widthLog2;
|
||||
u32 height = 8 << heightLog2;
|
||||
|
||||
u32 addr = (texParam & 0xFFFF) * 8;
|
||||
|
||||
TexCacheEntry entry = {0};
|
||||
|
||||
entry.TextureRAMStart[0] = addr;
|
||||
entry.WidthLog2 = widthLog2;
|
||||
entry.HeightLog2 = heightLog2;
|
||||
|
||||
// apparently a new texture
|
||||
if (fmt == 7)
|
||||
{
|
||||
entry.TextureRAMSize[0] = width*height*2;
|
||||
|
||||
ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, &gpu.VRAMFlat_Texture[addr]);
|
||||
}
|
||||
else if (fmt == 5)
|
||||
{
|
||||
u8* texData = &gpu.VRAMFlat_Texture[addr];
|
||||
u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
|
||||
if (addr >= 0x40000)
|
||||
slot1addr += 0x10000;
|
||||
u8* texAuxData = &gpu.VRAMFlat_Texture[slot1addr];
|
||||
|
||||
u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palBase*16);
|
||||
|
||||
entry.TextureRAMSize[0] = width*height/16*4;
|
||||
entry.TextureRAMStart[1] = slot1addr;
|
||||
entry.TextureRAMSize[1] = width*height/16*2;
|
||||
entry.TexPalStart = palBase*16;
|
||||
entry.TexPalSize = 0x10000;
|
||||
|
||||
ConvertCompressedTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, texData, texAuxData, palData);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 texSize, palAddr = palBase*16, numPalEntries;
|
||||
switch (fmt)
|
||||
{
|
||||
case 1: texSize = width*height; numPalEntries = 32; break;
|
||||
case 6: texSize = width*height; numPalEntries = 8; break;
|
||||
case 2: texSize = width*height/4; numPalEntries = 4; palAddr >>= 1; break;
|
||||
case 3: texSize = width*height/2; numPalEntries = 16; break;
|
||||
case 4: texSize = width*height; numPalEntries = 256; break;
|
||||
}
|
||||
|
||||
palAddr &= 0x1FFFF;
|
||||
|
||||
/*printf("creating texture | fmt: %d | %dx%d | %08x | %08x\n", fmt, width, height, addr, palAddr);
|
||||
svcSleepThread(1000*1000);*/
|
||||
|
||||
entry.TextureRAMSize[0] = texSize;
|
||||
entry.TexPalStart = palAddr;
|
||||
entry.TexPalSize = numPalEntries*2;
|
||||
|
||||
u8* texData = &gpu.VRAMFlat_Texture[addr];
|
||||
u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palAddr);
|
||||
|
||||
//assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024);
|
||||
|
||||
bool color0Transparent = texParam & (1 << 29);
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, DecodingBuffer, texData, palData); break;
|
||||
case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, DecodingBuffer, texData, palData); break;
|
||||
case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
|
||||
case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
|
||||
case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
if (entry.TextureRAMSize[i])
|
||||
entry.TextureHash[i] = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
|
||||
}
|
||||
if (entry.TexPalSize)
|
||||
entry.TexPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
|
||||
|
||||
auto& texArrays = TexArrays[widthLog2][heightLog2];
|
||||
auto& freeTextures = FreeTextures[widthLog2][heightLog2];
|
||||
|
||||
if (freeTextures.size() == 0)
|
||||
{
|
||||
texArrays.resize(texArrays.size()+1);
|
||||
TexHandleT& array = texArrays[texArrays.size()-1];
|
||||
|
||||
u32 layers = std::min<u32>((8*1024*1024) / (width*height*4), 64);
|
||||
|
||||
// allocate new array texture
|
||||
//printf("allocating new layer set for %d %d %d %d\n", width, height, texArrays.size()-1, array.ImageDescriptor);
|
||||
array = TexLoader.GenerateTexture(width, height, layers);
|
||||
|
||||
for (u32 i = 0; i < layers; i++)
|
||||
{
|
||||
freeTextures.push_back(TexArrayEntry{array, i});
|
||||
}
|
||||
}
|
||||
|
||||
TexArrayEntry storagePlace = freeTextures[freeTextures.size()-1];
|
||||
freeTextures.pop_back();
|
||||
|
||||
entry.Texture = storagePlace;
|
||||
|
||||
TexLoader.UploadTexture(storagePlace.TextureID, width, height, storagePlace.Layer, DecodingBuffer);
|
||||
//printf("using storage place %d %d | %d %d (%d)\n", width, height, storagePlace.TexArrayIdx, storagePlace.LayerIdx, array.ImageDescriptor);
|
||||
|
||||
textureHandle = storagePlace.TextureID;
|
||||
layer = storagePlace.Layer;
|
||||
helper = &Cache.emplace(std::make_pair(key, entry)).first->second.LastVariant;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
{
|
||||
for (u32 j = 0; j < 8; j++)
|
||||
{
|
||||
for (u32 k = 0; k < TexArrays[i][j].size(); k++)
|
||||
TexLoader.DeleteTexture(TexArrays[i][j][k]);
|
||||
TexArrays[i][j].clear();
|
||||
FreeTextures[i][j].clear();
|
||||
}
|
||||
}
|
||||
Cache.clear();
|
||||
}
|
||||
private:
|
||||
struct TexArrayEntry
|
||||
{
|
||||
TexHandleT TextureID;
|
||||
u32 Layer;
|
||||
};
|
||||
|
||||
struct TexCacheEntry
|
||||
{
|
||||
u32 LastVariant; // very cheap way to make variant lookup faster
|
||||
|
||||
u32 TextureRAMStart[2], TextureRAMSize[2];
|
||||
u32 TexPalStart, TexPalSize;
|
||||
u8 WidthLog2, HeightLog2;
|
||||
TexArrayEntry Texture;
|
||||
|
||||
u64 TextureHash[2];
|
||||
u64 TexPalHash;
|
||||
};
|
||||
std::unordered_map<u64, TexCacheEntry> Cache;
|
||||
|
||||
TexLoaderT TexLoader;
|
||||
|
||||
std::vector<TexArrayEntry> FreeTextures[8][8];
|
||||
std::vector<TexHandleT> TexArrays[8][8];
|
||||
|
||||
u32 DecodingBuffer[1024*1024];
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
29
src/GPU3D_TexcacheOpenGL.cpp
Normal file
29
src/GPU3D_TexcacheOpenGL.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include "GPU3D_TexcacheOpenGL.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
GLuint TexcacheOpenGLLoader::GenerateTexture(u32 width, u32 height, u32 layers)
|
||||
{
|
||||
GLuint texarray;
|
||||
glGenTextures(1, &texarray);
|
||||
glBindTexture(GL_TEXTURE_2D_ARRAY, texarray);
|
||||
glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8UI, width, height, layers);
|
||||
return texarray;
|
||||
}
|
||||
|
||||
void TexcacheOpenGLLoader::UploadTexture(GLuint handle, u32 width, u32 height, u32 layer, void* data)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D_ARRAY, handle);
|
||||
glTexSubImage3D(GL_TEXTURE_2D_ARRAY,
|
||||
0, 0, 0, layer,
|
||||
width, height, 1,
|
||||
GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, data);
|
||||
}
|
||||
|
||||
void TexcacheOpenGLLoader::DeleteTexture(GLuint handle)
|
||||
{
|
||||
glDeleteTextures(1, &handle);
|
||||
}
|
||||
|
||||
}
|
25
src/GPU3D_TexcacheOpenGL.h
Normal file
25
src/GPU3D_TexcacheOpenGL.h
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef GPU3D_TEXCACHEOPENGL
|
||||
#define GPU3D_TEXCACHEOPENGL
|
||||
|
||||
#include "GPU3D_Texcache.h"
|
||||
#include "OpenGLSupport.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
template <typename, typename>
|
||||
class Texcache;
|
||||
|
||||
class TexcacheOpenGLLoader
|
||||
{
|
||||
public:
|
||||
GLuint GenerateTexture(u32 width, u32 height, u32 layers);
|
||||
void UploadTexture(GLuint handle, u32 width, u32 height, u32 layer, void* data);
|
||||
void DeleteTexture(GLuint handle);
|
||||
};
|
||||
|
||||
using TexcacheOpenGL = Texcache<TexcacheOpenGLLoader, GLuint>;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -36,32 +36,27 @@ using namespace OpenGL;
|
||||
std::optional<GLCompositor> GLCompositor::New() noexcept
|
||||
{
|
||||
assert(glBindAttribLocation != nullptr);
|
||||
GLuint CompShader {};
|
||||
|
||||
std::array<GLuint, 3> CompShader {};
|
||||
if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, &CompShader[0], "CompositorShader"))
|
||||
return std::nullopt;
|
||||
|
||||
glBindAttribLocation(CompShader[2], 0, "vPosition");
|
||||
glBindAttribLocation(CompShader[2], 1, "vTexcoord");
|
||||
glBindFragDataLocation(CompShader[2], 0, "oColor");
|
||||
|
||||
if (!OpenGL::LinkShaderProgram(CompShader.data()))
|
||||
// OpenGL::LinkShaderProgram already deletes the shader program object
|
||||
// if linking the shaders together failed.
|
||||
if (!OpenGL::CompileVertexFragmentProgram(CompShader,
|
||||
kCompositorVS, kCompositorFS_Nearest,
|
||||
"CompositorShader",
|
||||
{{"vPosition", 0}, {"vTexcoord", 1}},
|
||||
{{"oColor", 0}}))
|
||||
return std::nullopt;
|
||||
|
||||
return { GLCompositor(CompShader) };
|
||||
}
|
||||
|
||||
GLCompositor::GLCompositor(std::array<GLuint, 3> compShader) noexcept : CompShader(compShader)
|
||||
GLCompositor::GLCompositor(GLuint compShader) noexcept : CompShader(compShader)
|
||||
{
|
||||
CompScaleLoc = glGetUniformLocation(CompShader[2], "u3DScale");
|
||||
Comp3DXPosLoc = glGetUniformLocation(CompShader[2], "u3DXPos");
|
||||
CompScaleLoc = glGetUniformLocation(CompShader, "u3DScale");
|
||||
Comp3DXPosLoc = glGetUniformLocation(CompShader, "u3DXPos");
|
||||
|
||||
glUseProgram(CompShader[2]);
|
||||
GLuint screenTextureUniform = glGetUniformLocation(CompShader[2], "ScreenTex");
|
||||
glUseProgram(CompShader);
|
||||
GLuint screenTextureUniform = glGetUniformLocation(CompShader, "ScreenTex");
|
||||
glUniform1i(screenTextureUniform, 0);
|
||||
GLuint _3dTextureUniform = glGetUniformLocation(CompShader[2], "_3DTex");
|
||||
GLuint _3dTextureUniform = glGetUniformLocation(CompShader, "_3DTex");
|
||||
glUniform1i(_3dTextureUniform, 1);
|
||||
|
||||
// all this mess is to prevent bleeding
|
||||
@ -136,7 +131,7 @@ GLCompositor::~GLCompositor()
|
||||
glDeleteVertexArrays(1, &CompVertexArrayID);
|
||||
glDeleteBuffers(1, &CompVertexBufferID);
|
||||
|
||||
OpenGL::DeleteShaderProgram(CompShader.data());
|
||||
glDeleteProgram(CompShader);
|
||||
}
|
||||
|
||||
|
||||
@ -174,7 +169,7 @@ GLCompositor& GLCompositor::operator=(GLCompositor&& other) noexcept
|
||||
CompVertices = other.CompVertices;
|
||||
|
||||
// Clean up these resources before overwriting them
|
||||
OpenGL::DeleteShaderProgram(CompShader.data());
|
||||
glDeleteProgram(CompShader);
|
||||
CompShader = other.CompShader;
|
||||
|
||||
glDeleteBuffers(1, &CompVertexBufferID);
|
||||
@ -244,11 +239,11 @@ void GLCompositor::Stop(const GPU& gpu) noexcept
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
}
|
||||
|
||||
void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
|
||||
void GLCompositor::RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept
|
||||
{
|
||||
int frontbuf = gpu.FrontBuffer;
|
||||
int backbuf = gpu.FrontBuffer ^ 1;
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[backbuf]);
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDisable(GL_STENCIL_TEST);
|
||||
@ -260,7 +255,7 @@ void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
// TODO: select more shaders (filtering, etc)
|
||||
OpenGL::UseShaderProgram(CompShader.data());
|
||||
glUseProgram(CompShader);
|
||||
glUniform1ui(CompScaleLoc, Scale);
|
||||
|
||||
// TODO: support setting this midframe, if ever needed
|
||||
@ -269,12 +264,12 @@ void GLCompositor::RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, CompScreenInputTex);
|
||||
|
||||
if (gpu.Framebuffer[frontbuf][0] && gpu.Framebuffer[frontbuf][1])
|
||||
if (gpu.Framebuffer[backbuf][0] && gpu.Framebuffer[backbuf][1])
|
||||
{
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER,
|
||||
GL_UNSIGNED_BYTE, gpu.Framebuffer[frontbuf][0].get());
|
||||
GL_UNSIGNED_BYTE, gpu.Framebuffer[backbuf][0].get());
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER,
|
||||
GL_UNSIGNED_BYTE, gpu.Framebuffer[frontbuf][1].get());
|
||||
GL_UNSIGNED_BYTE, gpu.Framebuffer[backbuf][1].get());
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
|
@ -28,6 +28,7 @@ namespace melonDS
|
||||
class GPU;
|
||||
struct RenderSettings;
|
||||
class GLRenderer;
|
||||
class Renderer3D;
|
||||
class GLCompositor
|
||||
{
|
||||
public:
|
||||
@ -42,14 +43,14 @@ public:
|
||||
[[nodiscard]] int GetScaleFactor() const noexcept { return Scale; }
|
||||
|
||||
void Stop(const GPU& gpu) noexcept;
|
||||
void RenderFrame(const GPU& gpu, GLRenderer& renderer) noexcept;
|
||||
void RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept;
|
||||
void BindOutputTexture(int buf);
|
||||
private:
|
||||
GLCompositor(std::array<GLuint, 3> CompShader) noexcept;
|
||||
GLCompositor(GLuint CompShader) noexcept;
|
||||
int Scale = 0;
|
||||
int ScreenH = 0, ScreenW = 0;
|
||||
|
||||
std::array<GLuint, 3> CompShader {};
|
||||
GLuint CompShader {};
|
||||
GLuint CompScaleLoc = 0;
|
||||
GLuint Comp3DXPosLoc = 0;
|
||||
|
||||
|
@ -26,11 +26,38 @@
|
||||
#include <initializer_list>
|
||||
#include <algorithm>
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
inline u64 GetRangedBitMask(u32 idx, u32 startBit, u32 bitsCount)
|
||||
{
|
||||
u32 startEntry = startBit >> 6;
|
||||
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
||||
|
||||
if (entriesCount > 1)
|
||||
{
|
||||
if (idx == startEntry)
|
||||
return 0xFFFFFFFFFFFFFFFF << (startBit & 0x3F);
|
||||
if (((startBit + bitsCount) & 0x3F) && idx == startEntry + entriesCount - 1)
|
||||
return ~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F));
|
||||
|
||||
return 0xFFFFFFFFFFFFFFFF;
|
||||
}
|
||||
else if (idx == startEntry)
|
||||
{
|
||||
return bitsCount == 64
|
||||
? 0xFFFFFFFFFFFFFFFF
|
||||
: ((1ULL << bitsCount) - 1) << (startBit & 0x3F);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// like std::bitset but less stupid and optimised for
|
||||
// our use case (keeping track of memory invalidations)
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
template <u32 Size>
|
||||
struct NonStupidBitField
|
||||
{
|
||||
@ -166,6 +193,11 @@ struct NonStupidBitField
|
||||
return Ref{*this, idx};
|
||||
}
|
||||
|
||||
bool operator[](u32 idx) const
|
||||
{
|
||||
return Data[idx >> 6] & (1ULL << (idx & 0x3F));
|
||||
}
|
||||
|
||||
void SetRange(u32 startBit, u32 bitsCount)
|
||||
{
|
||||
u32 startEntry = startBit >> 6;
|
||||
@ -187,6 +219,26 @@ struct NonStupidBitField
|
||||
}
|
||||
}
|
||||
|
||||
int Min() const
|
||||
{
|
||||
for (int i = 0; i < DataLength; i++)
|
||||
{
|
||||
if (Data[i])
|
||||
return i * 64 + __builtin_ctzll(Data[i]);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Max() const
|
||||
{
|
||||
for (int i = DataLength - 1; i >= 0; i--)
|
||||
{
|
||||
if (Data[i])
|
||||
return i * 64 + (63 - __builtin_clzll(Data[i]));
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
|
||||
{
|
||||
for (u32 i = 0; i < DataLength; i++)
|
||||
@ -195,6 +247,7 @@ struct NonStupidBitField
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
|
||||
{
|
||||
for (u32 i = 0; i < DataLength; i++)
|
||||
@ -203,6 +256,20 @@ struct NonStupidBitField
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator bool() const
|
||||
{
|
||||
for (int i = 0; i < DataLength - 1; i++)
|
||||
{
|
||||
if (Data[i])
|
||||
return true;
|
||||
}
|
||||
if (Data[DataLength-1] & ((Size&0x3F) ? ~(0xFFFFFFFFFFFFFFFF << (Size&0x3F)) : 0xFFFFFFFFFFFFFFFF))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -18,6 +18,14 @@
|
||||
|
||||
#include "OpenGLSupport.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#include "xxhash/xxhash.h"
|
||||
|
||||
namespace melonDS
|
||||
{
|
||||
|
||||
@ -27,9 +35,158 @@ using Platform::LogLevel;
|
||||
namespace OpenGL
|
||||
{
|
||||
|
||||
bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name)
|
||||
struct ShaderCacheEntry
|
||||
{
|
||||
u32 Length;
|
||||
u8* Data;
|
||||
u32 BinaryFormat;
|
||||
|
||||
ShaderCacheEntry(u8* data, u32 length, u32 binaryFmt)
|
||||
: Length(length), Data(data), BinaryFormat(binaryFmt)
|
||||
{
|
||||
assert(data != nullptr);
|
||||
}
|
||||
|
||||
ShaderCacheEntry(const ShaderCacheEntry&) = delete;
|
||||
ShaderCacheEntry(ShaderCacheEntry&& other)
|
||||
{
|
||||
Data = other.Data;
|
||||
Length = other.Length;
|
||||
BinaryFormat = other.BinaryFormat;
|
||||
|
||||
other.Data = nullptr;
|
||||
other.Length = 0;
|
||||
other.BinaryFormat = 0;
|
||||
}
|
||||
|
||||
~ShaderCacheEntry()
|
||||
{
|
||||
if (Data) // check whether it was moved
|
||||
delete[] Data;
|
||||
}
|
||||
};
|
||||
|
||||
std::unordered_map<u64, ShaderCacheEntry> ShaderCache;
|
||||
std::vector<u64> NewShaders;
|
||||
|
||||
constexpr u32 ShaderCacheMagic = 0x11CAC4E1;
|
||||
constexpr u32 ShaderCacheVersion = 1;
|
||||
|
||||
void LoadShaderCache()
|
||||
{
|
||||
// for now the shader cache only contains only compute shaders
|
||||
// because they take the longest to compile
|
||||
Platform::FileHandle* file = Platform::OpenLocalFile("shadercache", Platform::FileMode::Read);
|
||||
if (file == nullptr)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not find shader cache\n");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 magic, version, numPrograms;
|
||||
if (Platform::FileRead(&magic, 4, 1, file) != 1 || magic != ShaderCacheMagic)
|
||||
{
|
||||
Log(LogLevel::Error, "Shader cache file has invalid magic\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
if (Platform::FileRead(&version, 4, 1, file) != 1 || version != ShaderCacheVersion)
|
||||
{
|
||||
Log(LogLevel::Error, "Shader cache file has bad version\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
if (Platform::FileRead(&numPrograms, 4, 1, file) != 1)
|
||||
{
|
||||
Log(LogLevel::Error, "Shader cache file invalid program count\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
// not the best approach, because once changes pile up
|
||||
// we read and overwrite the old files
|
||||
for (u32 i = 0; i < numPrograms; i++)
|
||||
{
|
||||
int error = 3;
|
||||
|
||||
u32 length, binaryFormat;
|
||||
u64 sourceHash;
|
||||
error -= Platform::FileRead(&sourceHash, 8, 1, file);
|
||||
error -= Platform::FileRead(&length, 4, 1, file);
|
||||
error -= Platform::FileRead(&binaryFormat, 4, 1, file);
|
||||
|
||||
if (error != 0)
|
||||
{
|
||||
Log(LogLevel::Error, "Invalid shader cache entry\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
u8* data = new u8[length];
|
||||
if (Platform::FileRead(data, length, 1, file) != 1)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not read shader cache entry data\n");
|
||||
delete[] data;
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
ShaderCache.erase(sourceHash);
|
||||
ShaderCache.emplace(sourceHash, ShaderCacheEntry(data, length, binaryFormat));
|
||||
}
|
||||
|
||||
fileInvalid:
|
||||
Platform::CloseFile(file);
|
||||
}
|
||||
|
||||
void SaveShaderCache()
|
||||
{
|
||||
Platform::FileHandle* file = Platform::OpenLocalFile("shadercache", Platform::FileMode::ReadWrite);
|
||||
|
||||
if (file == nullptr)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not open or create shader cache file\n");
|
||||
return;
|
||||
}
|
||||
|
||||
int written = 3;
|
||||
u32 magic = ShaderCacheMagic, version = ShaderCacheVersion, numPrograms = ShaderCache.size();
|
||||
written -= Platform::FileWrite(&magic, 4, 1, file);
|
||||
written -= Platform::FileWrite(&version, 4, 1, file);
|
||||
written -= Platform::FileWrite(&numPrograms, 4, 1, file);
|
||||
|
||||
if (written != 0)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not write shader cache header\n");
|
||||
goto writeError;
|
||||
}
|
||||
|
||||
Platform::FileSeek(file, 0, Platform::FileSeekOrigin::End);
|
||||
|
||||
printf("new shaders %d\n", NewShaders.size());
|
||||
|
||||
for (u64 newShader : NewShaders)
|
||||
{
|
||||
int error = 4;
|
||||
auto it = ShaderCache.find(newShader);
|
||||
|
||||
error -= Platform::FileWrite(&it->first, 8, 1, file);
|
||||
error -= Platform::FileWrite(&it->second.Length, 4, 1, file);
|
||||
error -= Platform::FileWrite(&it->second.BinaryFormat, 4, 1, file);
|
||||
error -= Platform::FileWrite(it->second.Data, it->second.Length, 1, file);
|
||||
|
||||
if (error != 0)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not insert new shader cache entry\n");
|
||||
goto writeError;
|
||||
}
|
||||
}
|
||||
|
||||
writeError:
|
||||
Platform::CloseFile(file);
|
||||
|
||||
NewShaders.clear();
|
||||
}
|
||||
|
||||
bool CompilerShader(GLuint& id, const std::string& source, const std::string& name, const std::string& type)
|
||||
{
|
||||
int len;
|
||||
int res;
|
||||
|
||||
if (!glCreateShader)
|
||||
@ -38,61 +195,32 @@ bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char*
|
||||
return false;
|
||||
}
|
||||
|
||||
ids[0] = glCreateShader(GL_VERTEX_SHADER);
|
||||
len = strlen(vs);
|
||||
glShaderSource(ids[0], 1, &vs, &len);
|
||||
glCompileShader(ids[0]);
|
||||
const char* sourceC = source.c_str();
|
||||
int len = source.length();
|
||||
glShaderSource(id, 1, &sourceC, &len);
|
||||
|
||||
glGetShaderiv(ids[0], GL_COMPILE_STATUS, &res);
|
||||
glCompileShader(id);
|
||||
|
||||
glGetShaderiv(id, GL_COMPILE_STATUS, &res);
|
||||
if (res != GL_TRUE)
|
||||
{
|
||||
glGetShaderiv(ids[0], GL_INFO_LOG_LENGTH, &res);
|
||||
glGetShaderiv(id, GL_INFO_LOG_LENGTH, &res);
|
||||
if (res < 1) res = 1024;
|
||||
char* log = new char[res+1];
|
||||
glGetShaderInfoLog(ids[0], res+1, NULL, log);
|
||||
Log(LogLevel::Error, "OpenGL: failed to compile vertex shader %s: %s\n", name, log);
|
||||
Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", vs);
|
||||
glGetShaderInfoLog(id, res+1, NULL, log);
|
||||
Log(LogLevel::Error, "OpenGL: failed to compile %s shader %s: %s\n", type.c_str(), name.c_str(), log);
|
||||
Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", source.c_str());
|
||||
delete[] log;
|
||||
|
||||
glDeleteShader(ids[0]);
|
||||
glDeleteShader(id);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ids[1] = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
len = strlen(fs);
|
||||
glShaderSource(ids[1], 1, &fs, &len);
|
||||
glCompileShader(ids[1]);
|
||||
|
||||
glGetShaderiv(ids[1], GL_COMPILE_STATUS, &res);
|
||||
if (res != GL_TRUE)
|
||||
{
|
||||
glGetShaderiv(ids[1], GL_INFO_LOG_LENGTH, &res);
|
||||
if (res < 1) res = 1024;
|
||||
char* log = new char[res+1];
|
||||
glGetShaderInfoLog(ids[1], res+1, NULL, log);
|
||||
Log(LogLevel::Error, "OpenGL: failed to compile fragment shader %s: %s\n", name, log);
|
||||
//printf("shader source:\n--\n%s\n--\n", fs);
|
||||
delete[] log;
|
||||
|
||||
Platform::FileHandle* logf = Platform::OpenFile("shaderfail.log", Platform::FileMode::WriteText);
|
||||
Platform::FileWrite(fs, len+1, 1, logf);
|
||||
Platform::CloseFile(logf);
|
||||
|
||||
glDeleteShader(ids[0]);
|
||||
glDeleteShader(ids[1]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ids[2] = glCreateProgram();
|
||||
glAttachShader(ids[2], ids[0]);
|
||||
glAttachShader(ids[2], ids[1]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LinkShaderProgram(GLuint* ids)
|
||||
bool LinkProgram(GLuint& result, GLuint* ids, int numIds)
|
||||
{
|
||||
int res;
|
||||
|
||||
@ -102,46 +230,132 @@ bool LinkShaderProgram(GLuint* ids)
|
||||
return false;
|
||||
}
|
||||
|
||||
glLinkProgram(ids[2]);
|
||||
for (int i = 0; i < numIds; i++)
|
||||
{
|
||||
glAttachShader(result, ids[i]);
|
||||
}
|
||||
|
||||
glDetachShader(ids[2], ids[0]);
|
||||
glDetachShader(ids[2], ids[1]);
|
||||
glLinkProgram(result);
|
||||
|
||||
glDeleteShader(ids[0]);
|
||||
glDeleteShader(ids[1]);
|
||||
for (int i = 0; i < numIds; i++)
|
||||
glDetachShader(result, ids[i]);
|
||||
|
||||
glGetProgramiv(ids[2], GL_LINK_STATUS, &res);
|
||||
glGetProgramiv(result, GL_LINK_STATUS, &res);
|
||||
if (res != GL_TRUE)
|
||||
{
|
||||
glGetProgramiv(ids[2], GL_INFO_LOG_LENGTH, &res);
|
||||
glGetProgramiv(result, GL_INFO_LOG_LENGTH, &res);
|
||||
if (res < 1) res = 1024;
|
||||
char* log = new char[res+1];
|
||||
glGetProgramInfoLog(ids[2], res+1, NULL, log);
|
||||
glGetProgramInfoLog(result, res+1, NULL, log);
|
||||
Log(LogLevel::Error, "OpenGL: failed to link shader program: %s\n", log);
|
||||
delete[] log;
|
||||
|
||||
glDeleteProgram(ids[2]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DeleteShaderProgram(GLuint* ids)
|
||||
bool CompileComputeProgram(GLuint& result, const std::string& source, const std::string& name)
|
||||
{
|
||||
if (glDeleteProgram)
|
||||
{ // If OpenGL isn't loaded, then there's no shader program to delete
|
||||
glDeleteProgram(ids[2]);
|
||||
result = glCreateProgram();
|
||||
|
||||
/*u64 sourceHash = XXH64(source.data(), source.size(), 0);
|
||||
auto it = ShaderCache.find(sourceHash);
|
||||
if (it != ShaderCache.end())
|
||||
{
|
||||
glProgramBinary(result, it->second.BinaryFormat, it->second.Data, it->second.Length);
|
||||
|
||||
GLint linkStatus;
|
||||
glGetProgramiv(result, GL_LINK_STATUS, &linkStatus);
|
||||
if (linkStatus == GL_TRUE)
|
||||
{
|
||||
Log(LogLevel::Info, "Restored shader %s from cache\n", name.c_str());
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
}
|
||||
}*/
|
||||
Log(LogLevel::Error, "Shader %s from cache was rejected\n", name.c_str());
|
||||
|
||||
GLuint shader;
|
||||
bool linkingSucess = false;
|
||||
|
||||
if (!glCreateShader || !glDeleteShader)
|
||||
goto error;
|
||||
|
||||
shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
|
||||
if (!CompilerShader(shader, source, name, "compute"))
|
||||
goto error;
|
||||
|
||||
linkingSucess = LinkProgram(result, &shader, 1);
|
||||
|
||||
error:
|
||||
glDeleteShader(shader);
|
||||
|
||||
if (!linkingSucess)
|
||||
{
|
||||
glDeleteProgram(result);
|
||||
}
|
||||
/*else
|
||||
{
|
||||
GLint length;
|
||||
GLenum format;
|
||||
glGetProgramiv(result, GL_PROGRAM_BINARY_LENGTH, &length);
|
||||
|
||||
u8* buffer = new u8[length];
|
||||
glGetProgramBinary(result, length, nullptr, &format, buffer);
|
||||
|
||||
ShaderCache.emplace(sourceHash, ShaderCacheEntry(buffer, length, format));
|
||||
NewShaders.push_back(sourceHash);
|
||||
}*/
|
||||
|
||||
return linkingSucess;
|
||||
}
|
||||
|
||||
void UseShaderProgram(GLuint* ids)
|
||||
bool CompileVertexFragmentProgram(GLuint& result,
|
||||
const std::string& vs, const std::string& fs,
|
||||
const std::string& name,
|
||||
const std::initializer_list<AttributeTarget>& vertexInAttrs,
|
||||
const std::initializer_list<AttributeTarget>& fragmentOutAttrs)
|
||||
{
|
||||
if (glUseProgram)
|
||||
{ // If OpenGL isn't loaded, then there's no shader program to use
|
||||
glUseProgram(ids[2]);
|
||||
GLuint shaders[2] =
|
||||
{
|
||||
glCreateShader(GL_VERTEX_SHADER),
|
||||
glCreateShader(GL_FRAGMENT_SHADER)
|
||||
};
|
||||
result = glCreateProgram();
|
||||
|
||||
bool linkingSucess = false;
|
||||
|
||||
if (!CompilerShader(shaders[0], vs, name, "vertex"))
|
||||
goto error;
|
||||
|
||||
if (!CompilerShader(shaders[1], fs, name, "fragment"))
|
||||
goto error;
|
||||
|
||||
|
||||
for (const AttributeTarget& target : vertexInAttrs)
|
||||
{
|
||||
glBindAttribLocation(result, target.Location, target.Name);
|
||||
}
|
||||
for (const AttributeTarget& target : fragmentOutAttrs)
|
||||
{
|
||||
glBindFragDataLocation(result, target.Location, target.Name);
|
||||
}
|
||||
|
||||
linkingSucess = LinkProgram(result, shaders, 2);
|
||||
|
||||
error:
|
||||
glDeleteShader(shaders[1]);
|
||||
glDeleteShader(shaders[0]);
|
||||
|
||||
if (!linkingSucess)
|
||||
glDeleteProgram(result);
|
||||
|
||||
return linkingSucess;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,10 +28,23 @@
|
||||
namespace melonDS::OpenGL
|
||||
{
|
||||
|
||||
bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name);
|
||||
bool LinkShaderProgram(GLuint* ids);
|
||||
void DeleteShaderProgram(GLuint* ids);
|
||||
void UseShaderProgram(GLuint* ids);
|
||||
void LoadShaderCache();
|
||||
void SaveShaderCache();
|
||||
|
||||
struct AttributeTarget
|
||||
{
|
||||
const char* Name;
|
||||
u32 Location;
|
||||
};
|
||||
|
||||
|
||||
bool CompileVertexFragmentProgram(GLuint& result,
|
||||
const std::string& vs, const std::string& fs,
|
||||
const std::string& name,
|
||||
const std::initializer_list<AttributeTarget>& vertexInAttrs,
|
||||
const std::initializer_list<AttributeTarget>& fragmentOutAttrs);
|
||||
|
||||
bool CompileComputeProgram(GLuint& result, const std::string& source, const std::string& name);
|
||||
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <inttypes.h>
|
||||
#include "Platform.h"
|
||||
#include "Config.h"
|
||||
#include "GPU.h"
|
||||
|
||||
|
||||
namespace Config
|
||||
@ -59,6 +60,7 @@ bool Threaded3D;
|
||||
|
||||
int GL_ScaleFactor;
|
||||
bool GL_BetterPolygons;
|
||||
bool GL_HiresCoordinates;
|
||||
|
||||
bool LimitFPS;
|
||||
int MaxFPS;
|
||||
@ -246,11 +248,12 @@ ConfigEntry ConfigFile[] =
|
||||
{"ScreenVSync", 1, &ScreenVSync, false, false},
|
||||
{"ScreenVSyncInterval", 0, &ScreenVSyncInterval, 1, false},
|
||||
|
||||
{"3DRenderer", 0, &_3DRenderer, 0, false},
|
||||
{"3DRenderer", 0, &_3DRenderer, renderer3D_Software, false},
|
||||
{"Threaded3D", 1, &Threaded3D, true, false},
|
||||
|
||||
{"GL_ScaleFactor", 0, &GL_ScaleFactor, 1, false},
|
||||
{"GL_BetterPolygons", 1, &GL_BetterPolygons, false, false},
|
||||
{"GL_HiresCoordinates", 1, &GL_HiresCoordinates, true, false},
|
||||
|
||||
{"LimitFPS", 1, &LimitFPS, true, false},
|
||||
{"MaxFPS", 0, &MaxFPS, 1000, false},
|
||||
|
@ -51,6 +51,16 @@ enum
|
||||
micInputType_MAX,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
renderer3D_Software = 0,
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
renderer3D_OpenGL,
|
||||
renderer3D_OpenGLCompute,
|
||||
#endif
|
||||
renderer3D_Max,
|
||||
};
|
||||
|
||||
namespace Config
|
||||
{
|
||||
|
||||
@ -103,6 +113,7 @@ extern bool Threaded3D;
|
||||
|
||||
extern int GL_ScaleFactor;
|
||||
extern bool GL_BetterPolygons;
|
||||
extern bool GL_HiresCoordinates;
|
||||
|
||||
extern bool LimitFPS;
|
||||
extern int MaxFPS;
|
||||
|
@ -52,10 +52,12 @@
|
||||
#include "DSi_I2C.h"
|
||||
#include "GPU3D_Soft.h"
|
||||
#include "GPU3D_OpenGL.h"
|
||||
#include "GPU3D_Compute.h"
|
||||
|
||||
#include "Savestate.h"
|
||||
|
||||
#include "ROMManager.h"
|
||||
#include "EmuThread.h"
|
||||
//#include "ArchiveUtil.h"
|
||||
//#include "CameraManager.h"
|
||||
|
||||
@ -94,9 +96,8 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent)
|
||||
}
|
||||
|
||||
std::unique_ptr<NDS> EmuThread::CreateConsole(
|
||||
std::unique_ptr<melonDS::NDSCart::CartCommon>&& ndscart,
|
||||
std::unique_ptr<melonDS::GBACart::CartCommon>&& gbacart
|
||||
) noexcept
|
||||
std::unique_ptr<melonDS::NDSCart::CartCommon> &&ndscart,
|
||||
std::unique_ptr<melonDS::GBACart::CartCommon> &&gbacart) noexcept
|
||||
{
|
||||
auto arm7bios = ROMManager::LoadARM7BIOS();
|
||||
if (!arm7bios)
|
||||
@ -326,21 +327,12 @@ void EmuThread::run()
|
||||
videoRenderer = 0;
|
||||
}
|
||||
|
||||
if (videoRenderer == 0)
|
||||
{ // If we're using the software renderer...
|
||||
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>(Config::Threaded3D != 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto glrenderer = melonDS::GLRenderer::New();
|
||||
glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
|
||||
NDS->GPU.SetRenderer3D(std::move(glrenderer));
|
||||
}
|
||||
updateRenderer();
|
||||
|
||||
Input::Init();
|
||||
|
||||
u32 nframes = 0;
|
||||
double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency();
|
||||
perfCountsSec = 1.0 / SDL_GetPerformanceFrequency();
|
||||
double lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
|
||||
double frameLimitError = 0.0;
|
||||
double lastMeasureTime = lastTime;
|
||||
@ -451,20 +443,9 @@ void EmuThread::run()
|
||||
videoRenderer = 0;
|
||||
}
|
||||
|
||||
videoRenderer = screenGL ? Config::_3DRenderer : 0;
|
||||
updateRenderer();
|
||||
|
||||
videoSettingsDirty = false;
|
||||
|
||||
if (videoRenderer == 0)
|
||||
{ // If we're using the software renderer...
|
||||
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>(Config::Threaded3D != 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto glrenderer = melonDS::GLRenderer::New();
|
||||
glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
|
||||
NDS->GPU.SetRenderer3D(std::move(glrenderer));
|
||||
}
|
||||
}
|
||||
|
||||
// process input and hotkeys
|
||||
@ -512,7 +493,16 @@ void EmuThread::run()
|
||||
|
||||
|
||||
// emulate
|
||||
u32 nlines = NDS->RunFrame();
|
||||
u32 nlines;
|
||||
if (NDS->GPU.GetRenderer3D().NeedsShaderCompile())
|
||||
{
|
||||
compileShaders();
|
||||
nlines = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
nlines = NDS->RunFrame();
|
||||
}
|
||||
|
||||
if (ROMManager::NDSSave)
|
||||
ROMManager::NDSSave->CheckFlush();
|
||||
@ -750,3 +740,53 @@ bool EmuThread::emuIsActive()
|
||||
{
|
||||
return (RunningSomething == 1);
|
||||
}
|
||||
|
||||
void EmuThread::updateRenderer()
|
||||
{
|
||||
if (videoRenderer != lastVideoRenderer)
|
||||
{
|
||||
printf("creating renderer %d\n", videoRenderer);
|
||||
switch (videoRenderer)
|
||||
{
|
||||
case renderer3D_Software:
|
||||
NDS->GPU.SetRenderer3D(std::make_unique<SoftRenderer>());
|
||||
break;
|
||||
case renderer3D_OpenGL:
|
||||
NDS->GPU.SetRenderer3D(GLRenderer::New());
|
||||
break;
|
||||
case renderer3D_OpenGLCompute:
|
||||
NDS->GPU.SetRenderer3D(ComputeRenderer::New());
|
||||
break;
|
||||
default: __builtin_unreachable();
|
||||
}
|
||||
}
|
||||
lastVideoRenderer = videoRenderer;
|
||||
|
||||
switch (videoRenderer)
|
||||
{
|
||||
case renderer3D_Software:
|
||||
static_cast<SoftRenderer&>(NDS->GPU.GetRenderer3D()).SetThreaded(Config::Threaded3D, NDS->GPU);
|
||||
break;
|
||||
case renderer3D_OpenGL:
|
||||
static_cast<GLRenderer&>(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor);
|
||||
break;
|
||||
case renderer3D_OpenGLCompute:
|
||||
static_cast<ComputeRenderer&>(NDS->GPU.GetRenderer3D()).SetRenderSettings(Config::GL_ScaleFactor, Config::GL_HiresCoordinates);
|
||||
break;
|
||||
default: __builtin_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
void EmuThread::compileShaders()
|
||||
{
|
||||
int currentShader, shadersCount;
|
||||
u64 startTime = SDL_GetPerformanceCounter();
|
||||
// kind of hacky to look at the wallclock, though it is easier than
|
||||
// than disabling vsync
|
||||
do
|
||||
{
|
||||
NDS->GPU.GetRenderer3D().ShaderCompileStep(currentShader, shadersCount);
|
||||
} while (NDS->GPU.GetRenderer3D().NeedsShaderCompile() &&
|
||||
(SDL_GetPerformanceCounter() - startTime) * perfCountsSec < 1.0 / 6.0);
|
||||
mainWindow->osdAddMessage(0, "Compiling shader %d/%d", currentShader+1, shadersCount);
|
||||
}
|
||||
|
@ -94,6 +94,9 @@ signals:
|
||||
void syncVolumeLevel();
|
||||
|
||||
private:
|
||||
void updateRenderer();
|
||||
void compileShaders();
|
||||
|
||||
std::unique_ptr<melonDS::NDS> CreateConsole(
|
||||
std::unique_ptr<melonDS::NDSCart::CartCommon>&& ndscart,
|
||||
std::unique_ptr<melonDS::GBACart::CartCommon>&& gbacart
|
||||
@ -127,8 +130,9 @@ private:
|
||||
|
||||
int autoScreenSizing;
|
||||
|
||||
int videoRenderer;
|
||||
bool videoSettingsDirty;
|
||||
int lastVideoRenderer = -1;
|
||||
|
||||
double perfCountsSec;
|
||||
};
|
||||
|
||||
#endif // EMUTHREAD_H
|
||||
|
@ -709,19 +709,17 @@ void ScreenPanelGL::initOpenGL()
|
||||
|
||||
glContext->MakeCurrent();
|
||||
|
||||
OpenGL::BuildShaderProgram(kScreenVS, kScreenFS, screenShaderProgram, "ScreenShader");
|
||||
GLuint pid = screenShaderProgram[2];
|
||||
glBindAttribLocation(pid, 0, "vPosition");
|
||||
glBindAttribLocation(pid, 1, "vTexcoord");
|
||||
glBindFragDataLocation(pid, 0, "oColor");
|
||||
OpenGL::CompileVertexFragmentProgram(screenShaderProgram,
|
||||
kScreenVS, kScreenFS,
|
||||
"ScreenShader",
|
||||
{{"vPosition", 0}, {"vTexcoord", 1}},
|
||||
{{"oColor", 0}});
|
||||
|
||||
OpenGL::LinkShaderProgram(screenShaderProgram);
|
||||
glUseProgram(screenShaderProgram);
|
||||
glUniform1i(glGetUniformLocation(screenShaderProgram, "ScreenTex"), 0);
|
||||
|
||||
glUseProgram(pid);
|
||||
glUniform1i(glGetUniformLocation(pid, "ScreenTex"), 0);
|
||||
|
||||
screenShaderScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize");
|
||||
screenShaderTransformULoc = glGetUniformLocation(pid, "uTransform");
|
||||
screenShaderScreenSizeULoc = glGetUniformLocation(screenShaderProgram, "uScreenSize");
|
||||
screenShaderTransformULoc = glGetUniformLocation(screenShaderProgram, "uTransform");
|
||||
|
||||
// to prevent bleeding between both parts of the screen
|
||||
// with bilinear filtering enabled
|
||||
@ -769,21 +767,19 @@ void ScreenPanelGL::initOpenGL()
|
||||
memset(zeroData, 0, sizeof(zeroData));
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData);
|
||||
|
||||
OpenGL::CompileVertexFragmentProgram(osdShader,
|
||||
kScreenVS_OSD, kScreenFS_OSD,
|
||||
"OSDShader",
|
||||
{{"vPosition", 0}},
|
||||
{{"oColor", 0}});
|
||||
|
||||
OpenGL::BuildShaderProgram(kScreenVS_OSD, kScreenFS_OSD, osdShader, "OSDShader");
|
||||
glUseProgram(osdShader);
|
||||
glUniform1i(glGetUniformLocation(osdShader, "OSDTex"), 0);
|
||||
|
||||
pid = osdShader[2];
|
||||
glBindAttribLocation(pid, 0, "vPosition");
|
||||
glBindFragDataLocation(pid, 0, "oColor");
|
||||
|
||||
OpenGL::LinkShaderProgram(osdShader);
|
||||
glUseProgram(pid);
|
||||
glUniform1i(glGetUniformLocation(pid, "OSDTex"), 0);
|
||||
|
||||
osdScreenSizeULoc = glGetUniformLocation(pid, "uScreenSize");
|
||||
osdPosULoc = glGetUniformLocation(pid, "uOSDPos");
|
||||
osdSizeULoc = glGetUniformLocation(pid, "uOSDSize");
|
||||
osdScaleFactorULoc = glGetUniformLocation(pid, "uScaleFactor");
|
||||
osdScreenSizeULoc = glGetUniformLocation(osdShader, "uScreenSize");
|
||||
osdPosULoc = glGetUniformLocation(osdShader, "uOSDPos");
|
||||
osdSizeULoc = glGetUniformLocation(osdShader, "uOSDSize");
|
||||
osdScaleFactorULoc = glGetUniformLocation(osdShader, "uScaleFactor");
|
||||
|
||||
const float osdvertices[6*2] =
|
||||
{
|
||||
@ -818,8 +814,7 @@ void ScreenPanelGL::deinitOpenGL()
|
||||
glDeleteVertexArrays(1, &screenVertexArray);
|
||||
glDeleteBuffers(1, &screenVertexBuffer);
|
||||
|
||||
OpenGL::DeleteShaderProgram(screenShaderProgram);
|
||||
|
||||
glDeleteProgram(screenShaderProgram);
|
||||
|
||||
for (const auto& [key, tex] : osdTextures)
|
||||
{
|
||||
@ -830,8 +825,7 @@ void ScreenPanelGL::deinitOpenGL()
|
||||
glDeleteVertexArrays(1, &osdVertexArray);
|
||||
glDeleteBuffers(1, &osdVertexBuffer);
|
||||
|
||||
OpenGL::DeleteShaderProgram(osdShader);
|
||||
|
||||
glDeleteProgram(osdShader);
|
||||
|
||||
glContext->DoneCurrent();
|
||||
|
||||
@ -885,7 +879,7 @@ void ScreenPanelGL::drawScreenGL()
|
||||
|
||||
glViewport(0, 0, w, h);
|
||||
|
||||
glUseProgram(screenShaderProgram[2]);
|
||||
glUseProgram(screenShaderProgram);
|
||||
glUniform2f(screenShaderScreenSizeULoc, w / factor, h / factor);
|
||||
|
||||
int frontbuf = emuThread->FrontBuffer;
|
||||
@ -895,7 +889,7 @@ void ScreenPanelGL::drawScreenGL()
|
||||
if (emuThread->NDS->GPU.GetRenderer3D().Accelerated)
|
||||
{
|
||||
// hardware-accelerated render
|
||||
static_cast<GLRenderer&>(emuThread->NDS->GPU.GetRenderer3D()).GetCompositor().BindOutputTexture(frontbuf);
|
||||
emuThread->NDS->GPU.GetRenderer3D().BindOutputTexture(frontbuf);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -936,7 +930,7 @@ void ScreenPanelGL::drawScreenGL()
|
||||
|
||||
u32 y = kOSDMargin;
|
||||
|
||||
glUseProgram(osdShader[2]);
|
||||
glUseProgram(osdShader);
|
||||
|
||||
glUniform2f(osdScreenSizeULoc, w, h);
|
||||
glUniform1f(osdScaleFactorULoc, factor);
|
||||
|
@ -172,7 +172,7 @@ private:
|
||||
|
||||
GLuint screenVertexBuffer, screenVertexArray;
|
||||
GLuint screenTexture;
|
||||
GLuint screenShaderProgram[3];
|
||||
GLuint screenShaderProgram;
|
||||
GLuint screenShaderTransformULoc, screenShaderScreenSizeULoc;
|
||||
|
||||
QMutex screenSettingsLock;
|
||||
@ -181,7 +181,7 @@ private:
|
||||
|
||||
int lastScreenWidth = -1, lastScreenHeight = -1;
|
||||
|
||||
GLuint osdShader[3];
|
||||
GLuint osdShader;
|
||||
GLint osdScreenSizeULoc, osdPosULoc, osdSizeULoc;
|
||||
GLfloat osdScaleFactorULoc;
|
||||
GLuint osdVertexArray;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "types.h"
|
||||
#include "Platform.h"
|
||||
#include "Config.h"
|
||||
#include "GPU.h"
|
||||
|
||||
#include "VideoSettingsDialog.h"
|
||||
#include "ui_VideoSettingsDialog.h"
|
||||
@ -30,11 +31,20 @@
|
||||
|
||||
inline bool UsesGL()
|
||||
{
|
||||
return (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0);
|
||||
return (Config::ScreenUseGL != 0) || (Config::_3DRenderer != renderer3D_Software);
|
||||
}
|
||||
|
||||
VideoSettingsDialog* VideoSettingsDialog::currentDlg = nullptr;
|
||||
|
||||
void VideoSettingsDialog::setEnabled()
|
||||
{
|
||||
bool softwareRenderer = Config::_3DRenderer == renderer3D_Software;
|
||||
ui->cbGLDisplay->setEnabled(softwareRenderer);
|
||||
ui->cbSoftwareThreaded->setEnabled(softwareRenderer);
|
||||
ui->cbxGLResolution->setEnabled(!softwareRenderer);
|
||||
ui->cbBetterPolygons->setEnabled(Config::_3DRenderer == renderer3D_OpenGL);
|
||||
ui->cbxComputeHiResCoords->setEnabled(Config::_3DRenderer == renderer3D_OpenGLCompute);
|
||||
}
|
||||
|
||||
VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::VideoSettingsDialog)
|
||||
{
|
||||
@ -48,10 +58,12 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(
|
||||
oldSoftThreaded = Config::Threaded3D;
|
||||
oldGLScale = Config::GL_ScaleFactor;
|
||||
oldGLBetterPolygons = Config::GL_BetterPolygons;
|
||||
oldHiresCoordinates = Config::GL_HiresCoordinates;
|
||||
|
||||
grp3DRenderer = new QButtonGroup(this);
|
||||
grp3DRenderer->addButton(ui->rb3DSoftware, 0);
|
||||
grp3DRenderer->addButton(ui->rb3DOpenGL, 1);
|
||||
grp3DRenderer->addButton(ui->rb3DSoftware, renderer3D_Software);
|
||||
grp3DRenderer->addButton(ui->rb3DOpenGL, renderer3D_OpenGL);
|
||||
grp3DRenderer->addButton(ui->rb3DCompute, renderer3D_OpenGLCompute);
|
||||
#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
|
||||
connect(grp3DRenderer, SIGNAL(buttonClicked(int)), this, SLOT(onChange3DRenderer(int)));
|
||||
#else
|
||||
@ -75,25 +87,13 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(
|
||||
ui->cbxGLResolution->setCurrentIndex(Config::GL_ScaleFactor-1);
|
||||
|
||||
ui->cbBetterPolygons->setChecked(Config::GL_BetterPolygons != 0);
|
||||
ui->cbxComputeHiResCoords->setChecked(Config::GL_HiresCoordinates != 0);
|
||||
|
||||
if (!Config::ScreenVSync)
|
||||
ui->sbVSyncInterval->setEnabled(false);
|
||||
setVsyncControlEnable(UsesGL());
|
||||
|
||||
if (Config::_3DRenderer == 0)
|
||||
{
|
||||
ui->cbGLDisplay->setEnabled(true);
|
||||
ui->cbSoftwareThreaded->setEnabled(true);
|
||||
ui->cbxGLResolution->setEnabled(false);
|
||||
ui->cbBetterPolygons->setEnabled(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ui->cbGLDisplay->setEnabled(false);
|
||||
ui->cbSoftwareThreaded->setEnabled(false);
|
||||
ui->cbxGLResolution->setEnabled(true);
|
||||
ui->cbBetterPolygons->setEnabled(true);
|
||||
}
|
||||
setEnabled();
|
||||
}
|
||||
|
||||
VideoSettingsDialog::~VideoSettingsDialog()
|
||||
@ -119,6 +119,7 @@ void VideoSettingsDialog::on_VideoSettingsDialog_rejected()
|
||||
Config::Threaded3D = oldSoftThreaded;
|
||||
Config::GL_ScaleFactor = oldGLScale;
|
||||
Config::GL_BetterPolygons = oldGLBetterPolygons;
|
||||
Config::GL_HiresCoordinates = oldHiresCoordinates;
|
||||
|
||||
emit updateVideoSettings(old_gl != UsesGL());
|
||||
|
||||
@ -133,31 +134,18 @@ void VideoSettingsDialog::setVsyncControlEnable(bool hasOGL)
|
||||
|
||||
void VideoSettingsDialog::onChange3DRenderer(int renderer)
|
||||
{
|
||||
bool old_gl = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0);
|
||||
bool old_gl = UsesGL();
|
||||
|
||||
Config::_3DRenderer = renderer;
|
||||
|
||||
if (renderer == 0)
|
||||
{
|
||||
ui->cbGLDisplay->setEnabled(true);
|
||||
ui->cbSoftwareThreaded->setEnabled(true);
|
||||
ui->cbxGLResolution->setEnabled(false);
|
||||
ui->cbBetterPolygons->setEnabled(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ui->cbGLDisplay->setEnabled(false);
|
||||
ui->cbSoftwareThreaded->setEnabled(false);
|
||||
ui->cbxGLResolution->setEnabled(true);
|
||||
ui->cbBetterPolygons->setEnabled(true);
|
||||
}
|
||||
setEnabled();
|
||||
|
||||
emit updateVideoSettings(old_gl != UsesGL());
|
||||
}
|
||||
|
||||
void VideoSettingsDialog::on_cbGLDisplay_stateChanged(int state)
|
||||
{
|
||||
bool old_gl = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0);
|
||||
bool old_gl = UsesGL();
|
||||
|
||||
Config::ScreenUseGL = (state != 0);
|
||||
|
||||
@ -205,3 +193,10 @@ void VideoSettingsDialog::on_cbBetterPolygons_stateChanged(int state)
|
||||
|
||||
emit updateVideoSettings(false);
|
||||
}
|
||||
|
||||
void VideoSettingsDialog::on_cbxComputeHiResCoords_stateChanged(int state)
|
||||
{
|
||||
Config::GL_HiresCoordinates = (state != 0);
|
||||
|
||||
emit updateVideoSettings(false);
|
||||
}
|
||||
|
@ -65,10 +65,12 @@ private slots:
|
||||
|
||||
void on_cbxGLResolution_currentIndexChanged(int idx);
|
||||
void on_cbBetterPolygons_stateChanged(int state);
|
||||
void on_cbxComputeHiResCoords_stateChanged(int state);
|
||||
|
||||
void on_cbSoftwareThreaded_stateChanged(int state);
|
||||
private:
|
||||
void setVsyncControlEnable(bool hasOGL);
|
||||
void setEnabled();
|
||||
|
||||
Ui::VideoSettingsDialog* ui;
|
||||
|
||||
@ -81,6 +83,7 @@ private:
|
||||
int oldSoftThreaded;
|
||||
int oldGLScale;
|
||||
int oldGLBetterPolygons;
|
||||
int oldHiresCoordinates;
|
||||
};
|
||||
|
||||
#endif // VIDEOSETTINGSDIALOG_H
|
||||
|
@ -6,7 +6,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>408</width>
|
||||
<width>427</width>
|
||||
<height>262</height>
|
||||
</rect>
|
||||
</property>
|
||||
@ -24,7 +24,7 @@
|
||||
<enum>QLayout::SetFixedSize</enum>
|
||||
</property>
|
||||
<property name="horizontalSpacing">
|
||||
<number>-1</number>
|
||||
<number>6</number>
|
||||
</property>
|
||||
<item row="1" column="1">
|
||||
<widget class="QGroupBox" name="groupBox_3">
|
||||
@ -39,13 +39,6 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QComboBox" name="cbxGLResolution">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>The resolution at which the 3D graphics will be rendered. Higher resolutions improve graphics quality when the main window is enlarged, but may also cause glitches.</p></body></html></string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QCheckBox" name="cbBetterPolygons">
|
||||
<property name="whatsThis">
|
||||
@ -56,6 +49,20 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QComboBox" name="cbxGLResolution">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>The resolution at which the 3D graphics will be rendered. Higher resolutions improve graphics quality when the main window is enlarged, but may also cause glitches.</p></body></html></string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QCheckBox" name="cbxComputeHiResCoords">
|
||||
<property name="text">
|
||||
<string>Use high resolution coordinates</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
@ -94,23 +101,7 @@
|
||||
<string>Display settings</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_2">
|
||||
<item row="6" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>The interval at which to synchronize to the monitor's refresh rate. Set to 1 for a 60Hz monitor, 2 for 120Hz, ...</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>VSync interval:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="1">
|
||||
<item row="7" column="1">
|
||||
<widget class="QSpinBox" name="sbVSyncInterval">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>The interval at which to synchronize to the monitor's refresh rate. Set to 1 for a 60Hz monitor, 2 for 120Hz, ...</p></body></html></string>
|
||||
@ -123,7 +114,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0" colspan="2">
|
||||
<item row="5" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="cbGLDisplay">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>Use OpenGL to draw the DS screens to the main window. May result in better frame pacing. Mandatory when using the OpenGL 3D renderer.</p></body></html></string>
|
||||
@ -133,17 +124,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="cbVSync">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>When using OpenGL, synchronize the video output to your monitor's refresh rate.</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>VSync</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0" colspan="2">
|
||||
<item row="4" column="0" colspan="2">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -159,13 +140,39 @@
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item row="6" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="cbVSync">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>When using OpenGL, synchronize the video output to your monitor's refresh rate.</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>VSync</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="7" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>The interval at which to synchronize to the monitor's refresh rate. Set to 1 for a 60Hz monitor, 2 for 120Hz, ...</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>VSync interval:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2">
|
||||
<widget class="QRadioButton" name="rb3DOpenGL">
|
||||
<property name="whatsThis">
|
||||
<string><html><head/><body><p>The OpenGL renderer may be faster than software and supports graphical enhancements, but is more prone to glitches.</p></body></html></string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>OpenGL</string>
|
||||
<string>OpenGL (Classic)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -186,6 +193,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QRadioButton" name="rb3DCompute">
|
||||
<property name="text">
|
||||
<string>OpenGL (Compute shader)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -2048,6 +2048,7 @@ void MainWindow::onUpdateVideoSettings(bool glchange)
|
||||
connect(emuThread, SIGNAL(windowUpdate()), panel, SLOT(repaint()));
|
||||
}
|
||||
|
||||
printf("update video settings\n");
|
||||
videoSettingsDirty = true;
|
||||
|
||||
if (glchange)
|
||||
|
@ -175,10 +175,6 @@ bool camStarted[2];
|
||||
//extern int AspectRatiosNum;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static bool FileExtensionInList(const QString& filename, const QStringList& extensions, Qt::CaseSensitivity cs = Qt::CaseInsensitive)
|
||||
{
|
||||
return std::any_of(extensions.cbegin(), extensions.cend(), [&](const auto& ext) {
|
||||
@ -339,10 +335,10 @@ int main(int argc, char** argv)
|
||||
|
||||
if (!Config::Load()) QMessageBox::critical(NULL, "melonDS", "Unable to write to config.\nPlease check the write permissions of the folder you placed melonDS in.");
|
||||
|
||||
#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); }
|
||||
#define SANITIZE(var, min, max) { var = std::clamp<int>(var, min, max); }
|
||||
SANITIZE(Config::ConsoleType, 0, 1);
|
||||
#ifdef OGLRENDERER_ENABLED
|
||||
SANITIZE(Config::_3DRenderer, 0, 1); // 0 is the software renderer, 1 is the OpenGL renderer
|
||||
SANITIZE(Config::_3DRenderer, 0, renderer3D_Max);
|
||||
#else
|
||||
SANITIZE(Config::_3DRenderer, 0, 0);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user