From db6e928c8d4dab785cb7ea2c04e8f03325badc8d Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 12 Jun 2022 21:43:00 -0500 Subject: [PATCH 1/2] VideoCommon: Fix Intel GPUs on Metal/Vulkan locking up in ubershaders --- Source/Core/VideoBackends/Vulkan/VulkanContext.cpp | 4 ++++ Source/Core/VideoCommon/DriverDetails.cpp | 2 ++ Source/Core/VideoCommon/DriverDetails.h | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 0a82d6451f..1dc02e689a 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -377,6 +377,10 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD // We will use shader blending, so disable hardware dual source blending. config->backend_info.bSupportsDualSourceBlend = false; } + + // Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING)) + config->backend_info.bSupportsDynamicSamplerIndexing = false; } void VulkanContext::PopulateBackendInfoMultisampleModes( diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 6d7749c949..35e0808469 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -142,6 +142,8 @@ constexpr BugInfo m_known_bugs[] = { -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_APPLE, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true}, + {API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN, + BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 70c39450c9..80159495ee 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -328,6 +328,12 @@ enum Bug // Started version: -1 // Ended version: -1 BUG_BROKEN_DISCARD_WITH_EARLY_Z, + + // BUG: Using dynamic sampler indexing locks up the GPU + // Affected devices: Intel (macOS Metal) + // Started version: -1 + // Ended version: -1 + BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, }; // Initializes our internal vendor, device family, and driver version From 25929789c1e80d7a22d6acdb14082bc44e4fb0e9 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 12 Jun 2022 21:31:16 -0500 Subject: [PATCH 2/2] VideoCommon: Don't pass State by inout Spirv-cross's MSL codegen makes the amazing choice of compiling calls to inout functions as `State temp = s; call_function(temp); s = temp`. Not all Metal backends handle this mess well. In particular, it causes register spills on Intel, losing about 5% in performance. --- Source/Core/VideoCommon/UberShaderPixel.cpp | 23 +++++++-------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 161f40146a..29d52655b2 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -502,14 +502,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "\n" "int4 getTevReg(in State s, uint index) {{\n"); WriteSwitch(out, api_type, "index", tev_regs_lookup_table, 2, false); - out.Write("}}\n" - "\n" - "void setRegColor(inout State s, uint index, int3 color) {{\n"); - WriteSwitch(out, api_type, "index", tev_c_set_table, 2, true); - out.Write("}}\n" - "\n" - "void setRegAlpha(inout State s, uint index, int alpha) {{\n"); - WriteSwitch(out, api_type, "index", tev_a_set_table, 2, true); out.Write("}}\n" "\n"); @@ -861,9 +853,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " else\n" " color = clamp(color, -1024, 1023);\n" "\n" - " // Write result to the correct input register of the next stage\n" - " setRegColor(s, color_dest, color);\n" - "\n"); + " // Write result to the correct input register of the next stage\n"); + WriteSwitch(out, api_type, "color_dest", tev_c_set_table, 6, true); + out.Write("\n"); // Alpha combiner out.Write(" // Alpha Combiner\n"); @@ -927,11 +919,10 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " else\n" " alpha = clamp(alpha, -1024, 1023);\n" "\n" - " // Write result to the correct input register of the next stage\n" - " setRegAlpha(s, alpha_dest, alpha);\n" - " }}\n"); - - out.Write(" }} // Main TEV loop\n" + " // Write result to the correct input register of the next stage\n"); + WriteSwitch(out, api_type, "alpha_dest", tev_a_set_table, 6, true); + out.Write(" }}\n" + " }} // Main TEV loop\n" "\n"); // Select the output color and alpha registers from the last stage.