diff --git a/Source/Core/Core/Analytics.cpp b/Source/Core/Core/Analytics.cpp index 8638b79dba..a3060833c9 100644 --- a/Source/Core/Core/Analytics.cpp +++ b/Source/Core/Core/Analytics.cpp @@ -239,6 +239,8 @@ void DolphinAnalytics::MakePerGameBuilder() builder.AddData("gpu-has-early-z", g_Config.backend_info.bSupportsEarlyZ); builder.AddData("gpu-has-binding-layout", g_Config.backend_info.bSupportsBindingLayout); builder.AddData("gpu-has-bbox", g_Config.backend_info.bSupportsBBox); + builder.AddData("gpu-has-fragment-stores-and-atomics", + g_Config.backend_info.bSupportsFragmentStoresAndAtomics); builder.AddData("gpu-has-gs-instancing", g_Config.backend_info.bSupportsGSInstancing); builder.AddData("gpu-has-post-processing", g_Config.backend_info.bSupportsPostProcessing); builder.AddData("gpu-has-palette-conversion", g_Config.backend_info.bSupportsPaletteConversion); diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 2fc00dc037..e8a51d3441 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -112,7 +112,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsEarlyZ = shader_model_5_supported; // Requires full UAV functionality (only available in shader model 5) - g_Config.backend_info.bSupportsBBox = shader_model_5_supported; + g_Config.backend_info.bSupportsBBox = + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = shader_model_5_supported; // Requires the instance attribute (only available in shader model 5) g_Config.backend_info.bSupportsGSInstancing = shader_model_5_supported; diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp index bae916e0f8..7a48647c0e 100644 --- a/Source/Core/VideoBackends/D3D12/main.cpp +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -120,7 +120,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsEarlyZ = true; // Requires full UAV functionality (only available in shader model 5) - g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsBBox = + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; // Requires the instance attribute (only available in shader model 5) g_Config.backend_info.bSupportsGSInstancing = true; diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 0ee4bd2afe..ebf8433d92 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -2,22 +2,47 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include +#include #include #include "Common/GL/GLUtil.h" #include "VideoBackends/OGL/BoundingBox.h" +#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; +static GLuint s_pbo; + +static std::array s_stencil_bounds; +static bool s_stencil_updated; +static bool s_stencil_cleared; + +static int s_target_width; +static int s_target_height; namespace OGL { -void BoundingBox::Init() +void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) { - if (g_ActiveConfig.backend_info.bSupportsBBox) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + return; + + s_target_width = target_width; + s_target_height = target_height; + s_stencil_updated = false; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); + glBufferData(GL_PIXEL_PACK_BUFFER, s_target_width * s_target_height, nullptr, GL_STREAM_READ); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +} + +void BoundingBox::Init(int target_width, int target_height) +{ + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) { int initial_values[4] = {0, 0, 0, 0}; glGenBuffers(1, &s_bbox_buffer_id); @@ -25,46 +50,126 @@ void BoundingBox::Init() glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } + else + { + s_stencil_bounds = {{0, 0, 0, 0}}; + glGenBuffers(1, &s_pbo); + SetTargetSizeChanged(target_width, target_height); + } } void BoundingBox::Shutdown() { - if (g_ActiveConfig.backend_info.bSupportsBBox) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + { glDeleteBuffers(1, &s_bbox_buffer_id); + } + else + { + glDeleteBuffers(1, &s_pbo); + } } void BoundingBox::Set(int index, int value) { - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } + else + { + s_stencil_bounds[index] = value; + + if (!s_stencil_cleared) + { + // Assumes that the EFB framebuffer is currently bound + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + s_stencil_updated = false; + s_stencil_cleared = true; + } + } } int BoundingBox::Get(int index) { - int data = 0; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - - if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA)) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) { - // Using glMapBufferRange to read back the contents of the SSBO is extremely slow - // on nVidia drivers. This is more noticeable at higher internal resolutions. - // Using glGetBufferSubData instead does not seem to exhibit this slowdown. - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + int data = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA)) + { + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + } + else + { + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), + GL_MAP_READ_BIT); + if (ptr) + { + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + return data; } else { - // Using glMapBufferRange is faster on AMD cards by a measurable margin. - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), - GL_MAP_READ_BIT); - if (ptr) + if (s_stencil_updated) { - memcpy(&data, ptr, sizeof(int)); - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); - } - } + s_stencil_updated = false; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - return data; + FramebufferManager::ResolveEFBStencilTexture(); + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glReadPixels(0, 0, s_target_width, s_target_height, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); + + // Eke every bit of performance out of the compiler that we can + std::array bounds = s_stencil_bounds; + + u8* data = static_cast(glMapBufferRange( + GL_PIXEL_PACK_BUFFER, 0, s_target_height * s_target_width, GL_MAP_READ_BIT)); + + for (int row = 0; row < s_target_height; row++) + { + for (int col = 0; col < s_target_width; col++) + { + if (data[row * s_target_width + col] == 0) + continue; + bounds[0] = std::min(bounds[0], col); + bounds[1] = std::max(bounds[1], col); + bounds[2] = std::min(bounds[2], row); + bounds[3] = std::max(bounds[3], row); + } + } + + s_stencil_bounds = bounds; + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + + return s_stencil_bounds[index]; + } +} + +void BoundingBox::StencilWasUpdated() +{ + s_stencil_updated = true; + s_stencil_cleared = false; +} + +bool BoundingBox::NeedsStencilBuffer() +{ + return g_ActiveConfig.bBBoxEnable && !g_ActiveConfig.BBoxUseFragmentShaderImplementation(); } }; diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.h b/Source/Core/VideoBackends/OGL/BoundingBox.h index 0aedff54df..44365c9fbc 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.h +++ b/Source/Core/VideoBackends/OGL/BoundingBox.h @@ -9,9 +9,18 @@ namespace OGL class BoundingBox { public: - static void Init(); + static void Init(int target_width, int target_height); static void Shutdown(); + static void SetTargetSizeChanged(int target_width, int target_height); + + // When SSBO isn't available, the bounding box is calculated directly from the + // stencil buffer. + static bool NeedsStencilBuffer(); + // When the stencil buffer is changed, this function needs to be called to + // invalidate the cached bounding box data. + static void StencilWasUpdated(); + static void Set(int index, int value); static int Get(int index); }; diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 45a4b196a0..17244db24e 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -12,6 +12,7 @@ #include "Common/CommonTypes.h" #include "Common/GL/GLInterfaceBase.h" #include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" #include "Core/HW/Memmap.h" @@ -28,6 +29,7 @@ namespace OGL int FramebufferManager::m_targetWidth; int FramebufferManager::m_targetHeight; int FramebufferManager::m_msaaSamples; +bool FramebufferManager::m_enable_stencil_buffer; GLenum FramebufferManager::m_textureType; std::vector FramebufferManager::m_efbFramebuffer; @@ -49,7 +51,64 @@ GLuint FramebufferManager::m_EfbPokes_VBO; GLuint FramebufferManager::m_EfbPokes_VAO; SHADER FramebufferManager::m_EfbPokes; -FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples) +GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_format, + GLenum pixel_format, GLenum data_type) +{ + GLuint texture; + glGenTextures(1, &texture); + glBindTexture(texture_type, texture); + if (texture_type == GL_TEXTURE_2D_ARRAY) + { + glTexParameteri(texture_type, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage3D(texture_type, 0, internal_format, m_targetWidth, m_targetHeight, m_EFBLayers, 0, + pixel_format, data_type, nullptr); + } + else if (texture == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) + { + if (g_ogl_config.bSupports3DTextureStorage) + glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, m_EFBLayers, false); + else + glTexImage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, m_EFBLayers, false); + } + else if (texture == GL_TEXTURE_2D_MULTISAMPLE) + { + if (g_ogl_config.bSupports2DTextureStorage) + glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, false); + else + glTexImage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, false); + } + else + { + PanicAlert("Unhandled texture type %d", texture_type); + } + glBindTexture(texture_type, 0); + return texture; +} + +void FramebufferManager::BindLayeredTexture(GLuint texture, const std::vector& framebuffers, + GLenum attachment, GLenum texture_type) +{ + glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[0]); + FramebufferTexture(GL_FRAMEBUFFER, attachment, texture_type, texture, 0); + // Bind all the other layers as separate FBOs for blitting. + for (unsigned int i = 1; i < m_EFBLayers; i++) + { + glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[i]); + glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, texture, 0, i); + } +} + +bool FramebufferManager::HasStencilBuffer() +{ + return m_enable_stencil_buffer; +} + +FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, + bool enable_stencil_buffer) { m_xfbFramebuffer = 0; m_efbColor = 0; @@ -60,8 +119,8 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_targetWidth = targetWidth; m_targetHeight = targetHeight; - m_msaaSamples = msaaSamples; + m_enable_stencil_buffer = enable_stencil_buffer; // The EFB can be set to different pixel formats by the game through the // BPMEM_ZCOMPARE register (which should probably have a different name). @@ -76,166 +135,69 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glActiveTexture(GL_TEXTURE9); - GLuint glObj[3]; - glGenTextures(3, glObj); - m_efbColor = glObj[0]; - m_efbDepth = glObj[1]; - m_efbColorSwap = glObj[2]; - m_EFBLayers = (g_ActiveConfig.iStereoMode > 0) ? 2 : 1; m_efbFramebuffer.resize(m_EFBLayers); m_resolvedFramebuffer.resize(m_EFBLayers); - // OpenGL MSAA textures are a different kind of texture type and must be allocated - // with a different function, so we create them separately. + GLenum depth_internal_format = GL_DEPTH_COMPONENT32F; + GLenum depth_pixel_format = GL_DEPTH_COMPONENT; + GLenum depth_data_type = GL_FLOAT; + if (m_enable_stencil_buffer) + { + depth_internal_format = GL_DEPTH32F_STENCIL8; + depth_pixel_format = GL_DEPTH_STENCIL; + depth_data_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + } + if (m_msaaSamples <= 1) { m_textureType = GL_TEXTURE_2D_ARRAY; - - glBindTexture(m_textureType, m_efbColor); - glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(m_textureType, 0, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); - - glBindTexture(m_textureType, m_efbDepth); - glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(m_textureType, 0, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, - m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(m_textureType, 0, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); } else { - GLenum resolvedType = GL_TEXTURE_2D_ARRAY; - // Only use a layered multisample texture if needed. Some drivers // slow down significantly with single-layered multisample textures. if (m_EFBLayers > 1) - { m_textureType = GL_TEXTURE_2D_MULTISAMPLE_ARRAY; - - if (g_ogl_config.bSupports3DTextureStorage) - { - glBindTexture(m_textureType, m_efbColor); - glTexStorage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexStorage3DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, - m_targetWidth, m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexStorage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - glBindTexture(m_textureType, 0); - } - else - { - glBindTexture(m_textureType, m_efbColor); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - glBindTexture(m_textureType, 0); - } - } else - { m_textureType = GL_TEXTURE_2D_MULTISAMPLE; - if (g_ogl_config.bSupports2DTextureStorage) - { - glBindTexture(m_textureType, m_efbColor); - glTexStorage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexStorage2DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, - m_targetWidth, m_targetHeight, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexStorage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, false); - glBindTexture(m_textureType, 0); - } - else - { - glBindTexture(m_textureType, m_efbColor); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, m_targetWidth, - m_targetHeight, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, false); - glBindTexture(m_textureType, 0); - } - } - // Although we are able to access the multisampled texture directly, we don't do it everywhere. // The old way is to "resolve" this multisampled texture by copying it into a non-sampled // texture. // This would lead to an unneeded copy of the EFB, so we are going to avoid it. // But as this job isn't done right now, we do need that texture for resolving: - glGenTextures(2, glObj); - m_resolvedColorTexture = glObj[0]; - m_resolvedDepthTexture = glObj[1]; + GLenum resolvedType = GL_TEXTURE_2D_ARRAY; - glBindTexture(resolvedType, m_resolvedColorTexture); - glTexParameteri(resolvedType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(resolvedType, 0, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); - - glBindTexture(resolvedType, m_resolvedDepthTexture); - glTexParameteri(resolvedType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(resolvedType, 0, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, m_EFBLayers, - 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); + m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); + m_resolvedDepthTexture = + CreateTexture(resolvedType, depth_internal_format, depth_pixel_format, depth_data_type); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[0]); - FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, resolvedType, m_resolvedColorTexture, - 0); - FramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, resolvedType, m_resolvedDepthTexture, - 0); - - // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_resolvedColorTexture, 0, i); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_resolvedDepthTexture, 0, i); - } + BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, + resolvedType); + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, + resolvedType); + if (m_enable_stencil_buffer) + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, + resolvedType); } + m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); + m_efbDepth = + CreateTexture(m_textureType, depth_internal_format, depth_pixel_format, depth_data_type); + m_efbColorSwap = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); + // Create XFB framebuffer; targets will be created elsewhere. glGenFramebuffers(1, &m_xfbFramebuffer); // Bind target textures to EFB framebuffer. glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); - FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textureType, m_efbColor, 0); - FramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_textureType, m_efbDepth, 0); - - // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[i]); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_efbColor, 0, i); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_efbDepth, 0, i); - } + BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); + BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); + if (m_enable_stencil_buffer) + BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); // EFB framebuffer is currently bound, make sure to clear it before use. glViewport(0, 0, m_targetWidth, m_targetHeight); @@ -243,6 +205,11 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glClearColor(0.f, 0.f, 0.f, 0.f); glClearDepthf(1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + if (m_enable_stencil_buffer) + { + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + } // reinterpret pixel format const char* vs = m_EFBLayers > 1 ? "void main(void) {\n" @@ -542,6 +509,24 @@ GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc) } } +void FramebufferManager::ResolveEFBStencilTexture() +{ + if (m_msaaSamples <= 1) + return; + + // Resolve. + for (unsigned int i = 0; i < m_EFBLayers; i++) + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); + glBlitFramebuffer(0, 0, m_targetWidth, m_targetHeight, 0, 0, m_targetWidth, m_targetHeight, + GL_STENCIL_BUFFER_BIT, GL_NEAREST); + } + + // Return to EFB. + glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); +} + void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) { @@ -557,6 +542,13 @@ void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, sourceRc.GetWidth(), fbStride, fbHeight); } +GLuint FramebufferManager::GetResolvedFramebuffer() +{ + if (m_msaaSamples <= 1) + return m_efbFramebuffer[0]; + return m_resolvedFramebuffer[0]; +} + void FramebufferManager::SetFramebuffer(GLuint fb) { glBindFramebuffer(GL_FRAMEBUFFER, fb != 0 ? fb : GetEFBFramebuffer()); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index 84270777c4..62b2f5ce3c 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -63,13 +63,15 @@ struct XFBSource : public XFBSourceBase class FramebufferManager : public FramebufferManagerBase { public: - FramebufferManager(int targetWidth, int targetHeight, int msaaSamples); + FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, + bool enable_stencil_buffer); ~FramebufferManager(); // To get the EFB in texture form, these functions may have to transfer // the EFB to a resolved texture first. static GLuint GetEFBColorTexture(const EFBRectangle& sourceRc); static GLuint GetEFBDepthTexture(const EFBRectangle& sourceRc); + static void ResolveEFBStencilTexture(); static GLuint GetEFBFramebuffer(unsigned int layer = 0) { @@ -77,7 +79,7 @@ public: } static GLuint GetXFBFramebuffer() { return m_xfbFramebuffer; } // Resolved framebuffer is only used in MSAA mode. - static GLuint GetResolvedFramebuffer() { return m_resolvedFramebuffer[0]; } + static GLuint GetResolvedFramebuffer(); static void SetFramebuffer(GLuint fb); static void FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); @@ -100,8 +102,13 @@ public: static void ReinterpretPixelData(unsigned int convtype); static void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); + static bool HasStencilBuffer(); private: + GLuint CreateTexture(GLenum texture_type, GLenum internal_format, GLenum pixel_format, + GLenum data_type); + void BindLayeredTexture(GLuint texture, const std::vector& framebuffers, + GLenum attachment, GLenum texture_type); std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; @@ -122,6 +129,8 @@ private: static GLuint m_efbColorSwap; // will be hot swapped with m_efbColor when reinterpreting EFB pixel formats + static bool m_enable_stencil_buffer; + // Only used in MSAA mode, TODO: try to avoid them static std::vector m_resolvedFramebuffer; static GLuint m_resolvedColorTexture; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index ef91b01f02..508d6e41ed 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -618,7 +618,7 @@ void ProgramShaderCache::CreateHeader() "#define SAMPLER_BINDING(x)\n", // Input/output blocks are matched by name during program linking "#define VARYING_LOCATION(x)\n", - !is_glsles && g_ActiveConfig.backend_info.bSupportsBBox ? + !is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ? "#extension GL_ARB_shader_storage_buffer_object : enable" : "", v < GLSL_400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ? diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index f898d73722..69554269cb 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -411,7 +411,8 @@ Renderer::Renderer() g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsBBox = + g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && @@ -497,7 +498,7 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGSInstancing = g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; - g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupports2DTextureStorage = true; if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 && @@ -518,7 +519,7 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsSSAA = true; - g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsCopySubImage = true; g_ogl_config.bSupportsGLBaseVertex = true; g_ogl_config.bSupportsDebug = true; @@ -655,10 +656,13 @@ Renderer::Renderer() // options while running g_Config.bRunning = true; - glStencilFunc(GL_ALWAYS, 0, 0); - glBlendFunc(GL_ONE, GL_ONE); + // The stencil is used for bounding box emulation when SSBOs are not available + glDisable(GL_STENCIL_TEST); + glStencilFunc(GL_ALWAYS, 1, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); // Reset The Current Viewport + // Reset The Current Viewport + glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); if (g_ActiveConfig.backend_info.bSupportsClipControl) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); @@ -675,10 +679,9 @@ Renderer::Renderer() glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment - glDisable(GL_STENCIL_TEST); glEnable(GL_SCISSOR_TEST); - glScissor(0, 0, GetTargetWidth(), GetTargetHeight()); + glBlendFunc(GL_ONE, GL_ONE); glBlendColor(0, 0, 0, 0.5f); glClearDepthf(1.0f); @@ -731,8 +734,8 @@ void Renderer::Shutdown() void Renderer::Init() { // Initialize the FramebufferManager - g_framebuffer_manager = - std::make_unique(m_target_width, m_target_height, s_MSAASamples); + g_framebuffer_manager = std::make_unique( + m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); m_post_processor = std::make_unique(); s_raster_font = std::make_unique(); @@ -1335,34 +1338,38 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, } bool target_size_changed = CalculateTargetSize(); - if (target_size_changed || xfbchanged || window_resized || - (s_last_multisamples != g_ActiveConfig.iMultisamples) || - (s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0))) + bool stencil_buffer_enabled = + static_cast(g_framebuffer_manager.get())->HasStencilBuffer(); + + bool fb_needs_update = target_size_changed || + s_last_multisamples != g_ActiveConfig.iMultisamples || + stencil_buffer_enabled != BoundingBox::NeedsStencilBuffer() || + s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0); + + if (xfbchanged || window_resized || fb_needs_update) { s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; - UpdateDrawRectangle(); + } + if (fb_needs_update) + { + s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; + s_last_multisamples = g_ActiveConfig.iMultisamples; + s_MSAASamples = s_last_multisamples; - if (target_size_changed || s_last_multisamples != g_ActiveConfig.iMultisamples || - s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0)) + if (s_MSAASamples > 1 && s_MSAASamples > g_ogl_config.max_samples) { - s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; - s_last_multisamples = g_ActiveConfig.iMultisamples; - s_MSAASamples = s_last_multisamples; - - if (s_MSAASamples > 1 && s_MSAASamples > g_ogl_config.max_samples) - { - s_MSAASamples = g_ogl_config.max_samples; - OSD::AddMessage(StringFromFormat( - "%d Anti Aliasing samples selected, but only %d supported by your GPU.", - s_last_multisamples, g_ogl_config.max_samples), - 10000); - } - - g_framebuffer_manager.reset(); - g_framebuffer_manager = - std::make_unique(m_target_width, m_target_height, s_MSAASamples); + s_MSAASamples = g_ogl_config.max_samples; + OSD::AddMessage( + StringFromFormat("%d Anti Aliasing samples selected, but only %d supported by your GPU.", + s_last_multisamples, g_ogl_config.max_samples), + 10000); } + + g_framebuffer_manager.reset(); + g_framebuffer_manager = std::make_unique( + m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); + BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); } // --------------------------------------------------------------------- diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index bd9878b692..58d7b19ad9 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -14,9 +14,11 @@ #include "Common/GL/GLExtensions/GLExtensions.h" #include "Common/StringUtil.h" +#include "VideoBackends/OGL/BoundingBox.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" @@ -156,8 +158,19 @@ void VertexManager::vFlush() // setup the pointers nativeVertexFmt->SetupVertexPointers(); + if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) + { + glEnable(GL_STENCIL_TEST); + } + Draw(stride); + if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) + { + OGL::BoundingBox::StencilWasUpdated(); + glDisable(GL_STENCIL_TEST); + } + #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { @@ -177,7 +190,6 @@ void VertexManager::vFlush() } #endif g_Config.iSaveTargetId++; - ClearEFBCache(); } diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 3be16a6cbf..c1cf73f413 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -212,7 +212,7 @@ void VideoBackend::Video_Prepare() g_sampler_cache = std::make_unique(); static_cast(g_renderer.get())->Init(); TextureConverter::Init(); - BoundingBox::Init(); + BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); } void VideoBackend::Shutdown() diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 1487e09afc..74e7786130 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -240,6 +240,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. config->backend_info.bSupportsGSInstancing = false; // Dependent on features. config->backend_info.bSupportsBBox = false; // Dependent on features. + config->backend_info.bSupportsFragmentStoresAndAtomics = false; // Dependent on features. config->backend_info.bSupportsSSAA = false; // Dependent on features. config->backend_info.bSupportsDepthClamp = false; // Dependent on features. config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. @@ -264,7 +265,8 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD config->backend_info.bSupportsDualSourceBlend = (features.dualSrcBlend == VK_TRUE); config->backend_info.bSupportsGeometryShaders = (features.geometryShader == VK_TRUE); config->backend_info.bSupportsGSInstancing = (features.geometryShader == VK_TRUE); - config->backend_info.bSupportsBBox = (features.fragmentStoresAndAtomics == VK_TRUE); + config->backend_info.bSupportsBBox = config->backend_info.bSupportsFragmentStoresAndAtomics = + (features.fragmentStoresAndAtomics == VK_TRUE); config->backend_info.bSupportsSSAA = (features.sampleRateShading == VK_TRUE); // Disable geometry shader when shaderTessellationAndGeometryPointSize is not supported. diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 608248fab8..9eb0d625cb 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -171,7 +171,7 @@ PixelShaderUid GetPixelShaderUid() uid_data->genMode_numtevstages = bpmem.genMode.numtevstages; uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; uid_data->per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; - uid_data->bounding_box = g_ActiveConfig.backend_info.bSupportsBBox && + uid_data->bounding_box = g_ActiveConfig.BBoxUseFragmentShaderImplementation() && g_ActiveConfig.bBBoxEnable && BoundingBox::active; uid_data->rgba6_format = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 2db2494a05..7579c90c6e 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -119,6 +119,7 @@ void VideoConfig::Load(const std::string& ini_file) IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks"); hacks->Get("EFBAccessEnable", &bEFBAccessEnable, true); hacks->Get("BBoxEnable", &bBBoxEnable, false); + hacks->Get("BBoxPreferStencilImplementation", &bBBoxPreferStencilImplementation, false); hacks->Get("ForceProgressive", &bForceProgressive, true); hacks->Get("EFBToTextureEnable", &bSkipEFBCopyToRam, true); hacks->Get("EFBScaledCopy", &bCopyEFBScaled, true); @@ -342,6 +343,7 @@ void VideoConfig::Save(const std::string& ini_file) IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks"); hacks->Set("EFBAccessEnable", bEFBAccessEnable); hacks->Set("BBoxEnable", bBBoxEnable); + hacks->Set("BBoxPreferStencilImplementation", bBBoxPreferStencilImplementation); hacks->Set("ForceProgressive", bForceProgressive); hacks->Set("EFBToTextureEnable", bSkipEFBCopyToRam); hacks->Set("EFBScaledCopy", bCopyEFBScaled); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index ec79e6de48..a3a44f2ec7 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -114,6 +114,7 @@ struct VideoConfig final bool bEFBAccessEnable; bool bPerfQueriesEnable; bool bBBoxEnable; + bool bBBoxPreferStencilImplementation; // OpenGL-only, to see how slow it is compared to SSBOs bool bForceProgressive; bool bEFBEmulateFormatChanges; @@ -189,6 +190,7 @@ struct VideoConfig final bool bSupportsPaletteConversion; bool bSupportsClipControl; // Needed by VertexShaderGen, so must stay in VideoCommon bool bSupportsSSAA; + bool bSupportsFragmentStoresAndAtomics; // a.k.a. OpenGL SSBOs a.k.a. Direct3D UAVs bool bSupportsDepthClamp; // Needed by VertexShaderGen, so must stay in VideoCommon bool bSupportsReversedDepthRange; bool bSupportsMultithreading; @@ -202,6 +204,12 @@ struct VideoConfig final { return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen; } + bool BBoxUseFragmentShaderImplementation() const + { + if (backend_info.api_type == APIType::OpenGL && bBBoxPreferStencilImplementation) + return false; + return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics; + } }; extern VideoConfig g_Config;