diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 93aac5ce..16a3d80a 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -59,6 +59,8 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c shaderSource += std::to_string(CoarseTileCountY) + ";"; shaderSource += "\n#define CoarseTileArea "; shaderSource += std::to_string(CoarseTileArea); + shaderSource += "\n#define ClearCoarseBinMaskLocalSize "; + shaderSource += std::to_string(ClearCoarseBinMaskLocalSize); shaderSource += ComputeRendererShaders::Common; shaderSource += source; @@ -332,6 +334,7 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate TileSize = std::min(8 * TileScale, 32); CoarseTileCountY = TileSize < 32 ? 4 : 6; + ClearCoarseBinMaskLocalSize = TileSize < 32 ? 64 : 48; CoarseTileArea = CoarseTileCountX * CoarseTileCountY; CoarseTileW = CoarseTileCountX * TileSize; CoarseTileH = CoarseTileCountY * TileSize; @@ -944,7 +947,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory); glUseProgram(ShaderClearCoarseBinMask); - glDispatchCompute(TilesPerLine*TileLines/32, 1, 1); + glDispatchCompute(TilesPerLine*TileLines/ClearCoarseBinMaskLocalSize, 1, 1); bool wbuffer = false; if (numYSpans > 0) diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index 6a5fd499..30766ec7 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -169,6 +169,7 @@ private: int CoarseTileArea; int CoarseTileW; int CoarseTileH; + int ClearCoarseBinMaskLocalSize; static constexpr int BinStride = 2048/32; static constexpr int CoarseBinStride = BinStride/32; diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index 9b319029..55634610 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -846,7 +846,7 @@ void main() const std::string ClearCoarseBinMask = BinningBuffer + R"( -layout (local_size_x = 32) in; +layout (local_size_x = ClearCoarseBinMaskLocalSize) in; void main() {