diff --git a/Externals/Vulkan-Headers b/Externals/Vulkan-Headers index 05fe2cc910..29f979ee5a 160000 --- a/Externals/Vulkan-Headers +++ b/Externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 05fe2cc910a68c9ba5dac07db46ef78573acee72 +Subproject commit 29f979ee5aa58b7b005f805ea8df7a855c39ff37 diff --git a/Source/Core/VideoBackends/D3D/D3DMain.cpp b/Source/Core/VideoBackends/D3D/D3DMain.cpp index 9a25c1abde..286ec5928e 100644 --- a/Source/Core/VideoBackends/D3D/D3DMain.cpp +++ b/Source/Core/VideoBackends/D3D/D3DMain.cpp @@ -116,6 +116,8 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsPartialMultisampleResolve = true; g_Config.backend_info.bSupportsDynamicVertexLoader = false; g_Config.backend_info.bSupportsHDROutput = true; + g_Config.backend_info.bSupportsUnrestrictedDepthRange = false; + g_Config.backend_info.bSupportsDepthClampControl = false; g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter); diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index 171455cf34..a5ec46efdd 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -91,6 +91,8 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsDynamicVertexLoader = true; g_Config.backend_info.bSupportsVSLinePointExpand = true; g_Config.backend_info.bSupportsHDROutput = true; + g_Config.backend_info.bSupportsUnrestrictedDepthRange = false; + g_Config.backend_info.bSupportsDepthClampControl = false; // We can only check texture support once we have a device. if (g_dx_context) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index de073280d7..ad934490b7 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -79,6 +79,8 @@ void Metal::Util::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsVSLinePointExpand = true; config->backend_info.bSupportsHDROutput = 1.0 < [[NSScreen deepestScreen] maximumPotentialExtendedDynamicRangeColorComponentValue]; + config->backend_info.bSupportsUnrestrictedDepthRange = false; + config->backend_info.bSupportsDepthClampControl = false; } void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 28afcce4dc..f48ad02f56 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -63,6 +63,8 @@ void VideoBackend::InitBackendInfo(const WindowSystemInfo& wsi) g_Config.backend_info.bSupportsSettingObjectNames = false; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; g_Config.backend_info.bSupportsDynamicVertexLoader = false; + g_Config.backend_info.bSupportsUnrestrictedDepthRange = false; + g_Config.backend_info.bSupportsDepthClampControl = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLConfig.cpp b/Source/Core/VideoBackends/OGL/OGLConfig.cpp index 4b1e5308a2..05bf12a3aa 100644 --- a/Source/Core/VideoBackends/OGL/OGLConfig.cpp +++ b/Source/Core/VideoBackends/OGL/OGLConfig.cpp @@ -350,6 +350,8 @@ bool PopulateConfig(GLContext* m_main_gl_context) GLExtensions::Supports("GL_ARB_derivative_control") || GLExtensions::Version() >= 450; g_Config.backend_info.bSupportsTextureQueryLevels = GLExtensions::Supports("GL_ARB_texture_query_levels") || GLExtensions::Version() >= 430; + g_Config.backend_info.bSupportsUnrestrictedDepthRange = + GLExtensions::Supports("GL_NV_depth_buffer_float"); if (GLExtensions::Supports("GL_ARB_shader_storage_buffer_object")) { diff --git a/Source/Core/VideoBackends/OGL/OGLGfx.cpp b/Source/Core/VideoBackends/OGL/OGLGfx.cpp index fa20a76d11..b63d6db014 100644 --- a/Source/Core/VideoBackends/OGL/OGLGfx.cpp +++ b/Source/Core/VideoBackends/OGL/OGLGfx.cpp @@ -118,6 +118,16 @@ static void APIENTRY ClearDepthf(GLfloat depthval) glClearDepth(depthval); } +// Two small overrides to support unrestricted depth range +static void APIENTRY DepthRangefNV(GLfloat neardepth, GLfloat fardepth) +{ + glDepthRangedNV(neardepth, fardepth); +} +static void APIENTRY ClearDepthfNV(GLfloat depthval) +{ + glClearDepthdNV(depthval); +} + OGLGfx::OGLGfx(std::unique_ptr main_gl_context, float backbuffer_scale) : m_main_gl_context(std::move(main_gl_context)), m_current_rasterization_state(RenderState::GetInvalidRasterizationState()), @@ -137,11 +147,16 @@ OGLGfx::OGLGfx(std::unique_ptr main_gl_context, float backbuffer_scal if (!m_main_gl_context->IsGLES()) { - // OpenGL 3 doesn't provide GLES like float functions for depth. - // They are in core in OpenGL 4.1, so almost every driver should support them. - // But for the oldest ones, we provide fallbacks to the old double functions. - if (!GLExtensions::Supports("GL_ARB_ES2_compatibility")) + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) { + glDepthRangef = DepthRangefNV; + glClearDepthf = ClearDepthfNV; + } + else if (!GLExtensions::Supports("GL_ARB_ES2_compatibility")) + { + // OpenGL 3 doesn't provide GLES like float functions for depth. + // They are in core in OpenGL 4.1, so almost every driver should support them. + // But for the oldest ones, we provide fallbacks to the old double functions. glDepthRangef = DepthRangef; glClearDepthf = ClearDepthf; } @@ -387,7 +402,10 @@ void OGLGfx::ClearRegion(const MathUtil::Rectangle& target_rc, bool colorEn if (zEnable) { glDepthMask(zEnable ? GL_TRUE : GL_FALSE); - glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + glClearDepthf(float(z & 0xFFFFFF)); + else + glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); clear_mask |= GL_DEPTH_BUFFER_BIT; } diff --git a/Source/Core/VideoBackends/OGL/OGLMain.cpp b/Source/Core/VideoBackends/OGL/OGLMain.cpp index 13f47a054f..0ad1a285f4 100644 --- a/Source/Core/VideoBackends/OGL/OGLMain.cpp +++ b/Source/Core/VideoBackends/OGL/OGLMain.cpp @@ -135,6 +135,8 @@ bool VideoBackend::FillBackendInfo(GLContext* context) g_Config.backend_info.bSupportsPartialMultisampleResolve = true; // Unneccessary since OGL doesn't use pipelines g_Config.backend_info.bSupportsDynamicVertexLoader = false; + g_Config.backend_info.bSupportsUnrestrictedDepthRange = false; + g_Config.backend_info.bSupportsDepthClampControl = false; // TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics // options will show the option when it is not supported. The only way around this would be diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.cpp b/Source/Core/VideoBackends/OGL/OGLTexture.cpp index 58f1b10a04..9529ad74a7 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.cpp +++ b/Source/Core/VideoBackends/OGL/OGLTexture.cpp @@ -44,9 +44,15 @@ GLenum OGLTexture::GetGLInternalFormatForTextureFormat(AbstractTextureFormat for case AbstractTextureFormat::D24_S8: return GL_DEPTH24_STENCIL8; case AbstractTextureFormat::D32F: - return GL_DEPTH_COMPONENT32F; + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + return GL_DEPTH_COMPONENT32F_NV; + else + return GL_DEPTH_COMPONENT32F; case AbstractTextureFormat::D32F_S8: - return GL_DEPTH32F_STENCIL8; + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + return GL_DEPTH32F_STENCIL8_NV; + else + return GL_DEPTH32F_STENCIL8; default: PanicAlertFmt("Unhandled texture format."); return storage ? GL_RGBA8 : GL_RGBA; diff --git a/Source/Core/VideoBackends/Vulkan/VKGfx.cpp b/Source/Core/VideoBackends/Vulkan/VKGfx.cpp index d65a5d4680..d96e68ca33 100644 --- a/Source/Core/VideoBackends/Vulkan/VKGfx.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKGfx.cpp @@ -117,7 +117,9 @@ void VKGfx::ClearRegion(const MathUtil::Rectangle& target_rc, bool color_en clear_color_value.color.float32[1] = static_cast((color >> 8) & 0xFF) / 255.0f; clear_color_value.color.float32[2] = static_cast((color >> 0) & 0xFF) / 255.0f; clear_color_value.color.float32[3] = static_cast((color >> 24) & 0xFF) / 255.0f; - clear_depth_value.depthStencil.depth = static_cast(z & 0xFFFFFF) / 16777216.0f; + clear_depth_value.depthStencil.depth = static_cast(z & 0xFFFFFF); + if (!g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + clear_depth_value.depthStencil.depth /= 16777216.0f; if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) clear_depth_value.depthStencil.depth = 1.0f - clear_depth_value.depthStencil.depth; diff --git a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp index 83cd9367b5..7930c9979c 100644 --- a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp @@ -368,12 +368,19 @@ std::unique_ptr VKPipeline::Create(const AbstractPipelineConfig& con GetVulkanColorBlendState(config.blending_state, blend_attachment_states.data(), static_cast(blend_attachment_states.size())); + static const VkDepthClampRangeEXT clamp_range = {0.0f, 16777215.0f}; + static const VkPipelineViewportDepthClampControlCreateInfoEXT depth_clamp_state = { + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLAMP_CONTROL_CREATE_INFO_EXT, nullptr, + VK_DEPTH_CLAMP_MODE_USER_DEFINED_RANGE_EXT, // VkDepthClampModeEXT depthClampMode + &clamp_range // const VkDepthClampRangeEXT* pDepthClampRange + }; + // This viewport isn't used, but needs to be specified anyway. static const VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; static const VkRect2D scissor = {{0, 0}, {1, 1}}; static const VkPipelineViewportStateCreateInfo viewport_state = { VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - nullptr, + &depth_clamp_state, 0, // VkPipelineViewportStateCreateFlags flags; 1, // uint32_t viewportCount &viewport, // const VkViewport* pViewports diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index ed2407775a..ee2f104efd 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -471,6 +471,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsDynamicVertexLoader = true; // Assumed support. config->backend_info.bSupportsVSLinePointExpand = true; // Assumed support. config->backend_info.bSupportsHDROutput = true; // Assumed support. + config->backend_info.bSupportsUnrestrictedDepthRange = false; // Dependent on features. + config->backend_info.bSupportsDepthClampControl = false; // Dependent on features. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) @@ -676,6 +678,13 @@ bool VulkanContext::SelectDeviceExtensions(bool enable_surface) AddExtension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); AddExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false); + if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR)) + g_Config.backend_info.bSupportsUnrestrictedDepthRange = + AddExtension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, false); + if (g_Config.backend_info.bSupportsUnrestrictedDepthRange) + g_Config.backend_info.bSupportsDepthClampControl = + AddExtension(VK_EXT_DEPTH_CLAMP_CONTROL_EXTENSION_NAME, false); + return true; } diff --git a/Source/Core/VideoCommon/AbstractGfx.cpp b/Source/Core/VideoCommon/AbstractGfx.cpp index 4c386b12ca..d17b2511e3 100644 --- a/Source/Core/VideoCommon/AbstractGfx.cpp +++ b/Source/Core/VideoCommon/AbstractGfx.cpp @@ -75,7 +75,9 @@ void AbstractGfx::ClearRegion(const MathUtil::Rectangle& target_rc, bool co static_cast((color >> 8) & 0xFF) / 255.0f, static_cast((color >> 0) & 0xFF) / 255.0f, static_cast((color >> 24) & 0xFF) / 255.0f}, - static_cast(z & 0xFFFFFF) / 16777216.0f}; + static_cast(z & 0xFFFFFF)}; + if (!g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + uniforms.clear_depth = uniforms.clear_depth / 16777216.0f; if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) uniforms.clear_depth = 1.0f - uniforms.clear_depth; g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); diff --git a/Source/Core/VideoCommon/BPFunctions.cpp b/Source/Core/VideoCommon/BPFunctions.cpp index 2c6c733710..0fbd32a9bc 100644 --- a/Source/Core/VideoCommon/BPFunctions.cpp +++ b/Source/Core/VideoCommon/BPFunctions.cpp @@ -203,8 +203,8 @@ void SetScissorAndViewport() float y = g_framebuffer_manager->EFBToScaledYf(raw_y); float width = g_framebuffer_manager->EFBToScaledXf(raw_width); float height = g_framebuffer_manager->EFBToScaledYf(raw_height); - float min_depth = (xfmem.viewport.farZ - xfmem.viewport.zRange) / 16777216.0f; - float max_depth = xfmem.viewport.farZ / 16777216.0f; + float min_depth = (xfmem.viewport.farZ - xfmem.viewport.zRange); + float max_depth = xfmem.viewport.farZ; if (width < 0.f) { x += width; @@ -220,7 +220,7 @@ void SetScissorAndViewport() // This is necessary because we use a 2^24 divisor for all our depth values to prevent // floating-point round-trip errors. However the console GPU doesn't ever write a value // to the depth buffer that exceeds 2^24 - 1. - constexpr float GX_MAX_DEPTH = 16777215.0f / 16777216.0f; + constexpr float GX_MAX_DEPTH = 16777215.0f; if (!g_ActiveConfig.backend_info.bSupportsDepthClamp) { // There's no way to support oversized depth ranges in this situation. Let's just clamp the @@ -245,6 +245,12 @@ void SetScissorAndViewport() } } + if (!g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + { + min_depth /= 16777216.0f; + max_depth /= 16777216.0f; + } + float near_depth, far_depth; if (g_ActiveConfig.backend_info.bSupportsReversedDepthRange) { diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index 044b94a5de..a2b630ef4a 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -290,9 +290,11 @@ bool FramebufferManager::CreateEFBFramebuffer() } // Clear the renderable textures out. - g_gfx->SetAndClearFramebuffer(m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, - g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : - 0.0f); + g_gfx->SetAndClearFramebuffer( + m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, + !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 0.0f : + g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange ? 16777215.0f : + 1.0f); // Pixel Shader uses EFB scale as a constant, dirty that in case it changed Core::System::GetInstance().GetPixelShaderManager().Dirty(); @@ -1154,9 +1156,11 @@ void FramebufferManager::DoLoadState(PointerWrap& p) color_tex->texture->GetLayers() != m_efb_color_texture->GetLayers()) { WARN_LOG_FMT(VIDEO, "Failed to deserialize EFB contents. Clearing instead."); - g_gfx->SetAndClearFramebuffer(m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, - g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : - 0.0f); + g_gfx->SetAndClearFramebuffer( + m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, + !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 0.0f : + g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange ? 16777215.0f : + 1.0f); return; } diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index a76681634f..f365ad1cf3 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -1260,7 +1260,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos } else { - if (!host_config.backend_reversed_depth_range) + if (host_config.backend_unrestricted_depth_range) + out.Write("\tint zCoord = int(rawpos.z);\n"); + else if (!host_config.backend_reversed_depth_range) out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); @@ -1277,7 +1279,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos uid_data->ztest == EmulatedZ::EarlyWithZComplocHack; if (uid_data->per_pixel_depth && early_ztest) { - if (!host_config.backend_reversed_depth_range) + if (host_config.backend_unrestricted_depth_range) + out.Write("\tdepth = float(zCoord);\n"); + else if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -1298,7 +1302,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late) { - if (!host_config.backend_reversed_depth_range) + if (host_config.backend_unrestricted_depth_range) + out.Write("\tdepth = float(zCoord);\n"); + else if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -1923,7 +1929,9 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat if (per_pixel_depth) { out.Write("\t\tdepth = {};\n", - !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0"); + !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : + g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange ? "16777215.0" : + "1.0"); } // ZCOMPLOC HACK: diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 61820e0c82..dadb2158b3 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -90,9 +90,13 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) float depth = g_framebuffer_manager->PeekEFBDepth(x, y); if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) depth = 1.0f - depth; + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + depth = static_cast(depth); + else + depth = static_cast(depth * 16777216.0f); // Convert to 24bit depth - u32 z24depth = std::clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); + u32 z24depth = std::clamp(depth, 0, 0xFFFFFF); if (bpmem.zcontrol.pixel_format == PixelFormat::RGB565_Z16) { @@ -132,7 +136,9 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num { // Convert to floating-point depth. const EfbPokeData& point = points[i]; - float depth = float(point.data & 0xFFFFFF) / 16777216.0f; + float depth = float(point.data & 0xFFFFFF); + if (!g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + depth = depth / 16777216.0f; if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) depth = 1.0f - depth; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index d132847f14..5e3ad9b496 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -48,6 +48,8 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader; bits.backend_vs_point_line_expand = g_ActiveConfig.UseVSForLinePointExpand(); bits.backend_gl_layer_in_fs = g_ActiveConfig.backend_info.bSupportsGLLayerInFS; + bits.backend_unrestricted_depth_range = + g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange; return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 4723cbfc79..7c01bfe320 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -179,6 +179,7 @@ union ShaderHostConfig BitField<27, 1, bool, u32> backend_dynamic_vertex_loader; BitField<28, 1, bool, u32> backend_vs_point_line_expand; BitField<29, 1, bool, u32> backend_gl_layer_in_fs; + BitField<30, 1, bool, u32> backend_unrestricted_depth_range; static ShaderHostConfig GetCurrent(); }; diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 6962daa29b..3a0897c764 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -127,8 +127,11 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) code.Write(" tex_sample.x = 1.0 - tex_sample.x;\n"); - code.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n" - " return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + code.Write(" uint depth = uint(tex_sample.x);\n"); + else + code.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n"); + code.Write(" return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" "}}\n"); } else diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index da2e247a7f..baf3e89a6d 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -113,8 +113,11 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) out.Write(" tex_sample.x = 1.0 - tex_sample.x;\n"); - out.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n" - " return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" + if (g_ActiveConfig.backend_info.bSupportsUnrestrictedDepthRange) + out.Write(" uint depth = uint(tex_sample.x);\n"); + else + out.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n"); + out.Write(" return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" "}}\n"); } else diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index ca8f42ec8f..89151c0a9a 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -1326,7 +1326,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, if (host_config.fast_depth_calc) { - if (!host_config.backend_reversed_depth_range) + if (host_config.backend_unrestricted_depth_range) + out.Write(" int zCoord = int(rawpos.z);\n"); + else if (!host_config.backend_reversed_depth_range) out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n"); @@ -1382,7 +1384,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n" " // If early depth isn't enabled, we write to the zbuffer here\n" " int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n"); - if (!host_config.backend_reversed_depth_range) + if (host_config.backend_unrestricted_depth_range) + out.Write(" depth = float(zbuffer_zCoord);\n"); + else if (!host_config.backend_reversed_depth_range) out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n"); else out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n"); diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index a6c0502dfe..796a57d041 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -433,20 +433,34 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ } } - // Write the true depth value. If the game uses depth textures, then the pixel shader will - // override it with the correct values if not then early z culling will improve speed. - // There are two different ways to do this, when the depth range is oversized, we process - // the depth range in the vertex shader, if not we let the host driver handle it. + // Write the inverted depth value to map the -1..0 clip-space range to expected the 0..1 range. + // If the game uses depth textures, then the pixel shader will override it with the correct values + // if not then early z culling will improve speed. // - // Adjust z for the depth range. We're using an equation which incorperates a depth inversion, - // so we can map the console -1..0 range to the 0..1 range used in the depth buffer. - // We have to handle the depth range in the vertex shader instead of after the perspective - // divide, because some games will use a depth range larger than what is allowed by the - // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these - // games effectively add a depth bias to the values written to the depth buffer. + // The depth range can also be oversized beyond the range supported by the depth buffer. The final + // depth value will still be clamped to the 0..2^24-1 range, so these games effectively add a + // depth bias to the values written to the depth buffer. + // + // If an unrestricted depth range is supported then we can let host driver handle the oversized + // depth range. This can only work if the host driver also supports a feature to allow us to + // clamp any depth values that are beyond the supported 0..2^24-1 of the depth buffer. + // + // If only a depth range of 0..1 is supported then we process the depth equation in the vertex + // shader and handle the depth clamp by setting the depth range to 0..(2^24-1)/(2^24). + // + // If the depth range is not oversized or when we let the host driver handle the oversized depth + // range then the constants in this equation will be set so that z = -z. out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + if (host_config.backend_unrestricted_depth_range) + { + // If we don't use normalization then we can add a small depth bias to influence rounding + // behaviour since the console expects the depth value to be truncated before being added + // to the far value of the depth range. + out.Write("o.pos.z += (0.5 / 16777216.0);\n"); + } + if (!host_config.backend_clip_control) { // If the graphics API doesn't support a depth range of 0..1, then we need to map z to diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 4a46834c14..c41086fccb 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -600,24 +600,38 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("o.pos.z = o.pos.z * (1.0 - 1e-7);\n"); } - // Write the true depth value. If the game uses depth textures, then the pixel shader will - // override it with the correct values if not then early z culling will improve speed. - // There are two different ways to do this, when the depth range is oversized, we process - // the depth range in the vertex shader, if not we let the host driver handle it. + // Write the inverted depth value to map the -1..0 clip-space range to expected the 0..1 range. + // If the game uses depth textures, then the pixel shader will override it with the correct values + // if not then early z culling will improve speed. // - // Adjust z for the depth range. We're using an equation which incorperates a depth inversion, - // so we can map the console -1..0 range to the 0..1 range used in the depth buffer. - // We have to handle the depth range in the vertex shader instead of after the perspective - // divide, because some games will use a depth range larger than what is allowed by the - // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these - // games effectively add a depth bias to the values written to the depth buffer. + // The depth range can also be oversized beyond the range supported by the depth buffer. The final + // depth value will still be clamped to the 0..2^24-1 range, so these games effectively add a + // depth bias to the values written to the depth buffer. + // + // If an unrestricted depth range is supported then we can let host driver handle the oversized + // depth range. This can only work if the host driver also supports a feature to allow us to + // clamp any depth values that are beyond the supported 0..2^24-1 of the depth buffer. + // + // If only a depth range of 0..1 is supported then we process the depth equation in the vertex + // shader and handle the depth clamp by setting the depth range to 0..(2^24-1)/(2^24). + // + // If the depth range is not oversized or when we let the host driver handle the oversized depth + // range then the constants in this equation will be set so that z = -z. out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + if (host_config.backend_unrestricted_depth_range) + { + // If we don't use normalization then we can add a small depth bias to influence rounding + // behaviour since the console expects the depth value to be truncated before being added + // to the far value of the depth range. + out.Write("o.pos.z += (0.5 / 16777216.0);\n"); + } + if (!host_config.backend_clip_control) { // If the graphics API doesn't support a depth range of 0..1, then we need to map z to - // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point + // the -1..1 range. Unfortunately we have to use a subtraction, which is a lossy floating-point // operation that can introduce a round-trip error. out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); } diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 0415ac05dc..7860d292a3 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -139,6 +139,10 @@ void VertexShaderManager::SetProjectionMatrix(XFStateManager& xf_state_manager) bool VertexShaderManager::UseVertexDepthRange() { + // Backend has full native support for the depth range including clamping the depth. + if (g_ActiveConfig.backend_info.bSupportsDepthClampControl) + return false; + // We can't compute the depth range in the vertex shader if we don't support depth clamp. if (!g_ActiveConfig.backend_info.bSupportsDepthClamp) return false; diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 5cce23bfd2..d6cb8fdab0 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -338,6 +338,8 @@ struct VideoConfig final bool bSupportsVSLinePointExpand = false; bool bSupportsGLLayerInFS = true; bool bSupportsHDROutput = false; + bool bSupportsUnrestrictedDepthRange = false; + bool bSupportsDepthClampControl = false; } backend_info; // Utility