diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index cf0dc8e959..7d629f33ab 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -258,7 +258,8 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; - const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; + const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.zcontrol.early_ztest && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); + const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || (!g_ActiveConfig.bFastDepthCalc && !forced_early_z); out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", @@ -372,6 +373,14 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api } out.Write("float4 clipPos;\n"); } + + if (forced_early_z) + { + // HACK: This doesn't force the driver to write to depth buffer if alpha test fails. + // It just allows it, but it seems that all drivers do. + out.Write("layout(early_fragment_tests) in;\n"); + } + out.Write("void main()\n{\n"); } else @@ -584,6 +593,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api uid_data.ztex_op = bpmem.ztex2.op; uid_data.per_pixel_depth = per_pixel_depth; + uid_data.forced_early_z = forced_early_z; uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data.early_ztest = bpmem.zcontrol.early_ztest; uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; @@ -1129,17 +1139,20 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE Api out.Write("\t\tdepth = 1.f;\n"); // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before - // or after texturing and alpha test. PC GPUs have no way to support this - // feature properly as of 2012: depth buffer and depth test are not + // or after texturing and alpha test. PC graphics APIs have no way to support this + // feature properly as of 2012: Depth buffer and depth test are not // programmable and the depth test is always done after texturing. - // Most importantly, PC GPUs do not allow writing to the z-buffer without + // Most importantly, they do not allow writing to the z-buffer without // writing a color value (unless color writing is disabled altogether). - // We implement "depth test before texturing" by discarding the fragment - // when the alpha test fail. This is not a correct implementation because - // even if the depth test fails the fragment could be alpha blended, but - // we don't have a choice. - uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable; - if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) + // We implement "depth test before texturing" by disabling alpha test when early-z is in use. + // It seems to be less buggy than not to update the depth buffer if alpha test fails, + // but both ways wouldn't be accurate. + + // OpenGL 4.2 has a flag which allows the driver to still update the depth buffer + // if alpha test fails. The driver doesn't have to, but I assume they all do because + // it's the much faster code path for the GPU. + uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ; + if (!uid_data.alpha_test_use_zcomploc_hack) { out.Write("\t\tdiscard;\n"); if (ApiType != API_D3D11) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 21004323f9..c1ca009438 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -112,6 +112,7 @@ struct pixel_shader_uid_data u32 fast_depth_calc : 1; u32 per_pixel_depth : 1; + u32 forced_early_z : 1; u32 early_ztest : 1; u32 xfregs_numTexGen_numTexGens : 4; diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index ec4ad3ed3d..741521c728 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -155,7 +155,8 @@ struct VideoConfig bool bSupportsPixelLighting; bool bSupportsPrimitiveRestart; bool bSupportsSeparateAlphaFunction; - bool bSupportsGLSLUBO; // needed by pixelShaderGen, so must stay in videoCommon + bool bSupportsGLSLUBO; // needed by PixelShaderGen, so must stay in VideoCommon + bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon } backend_info; // Utility diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index 0561885b27..4e03dfb456 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -90,6 +90,7 @@ void InitBackendInfo() g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsPixelLighting = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; + g_Config.backend_info.bSupportsEarlyZ = false; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index 42011d1b9e..19e6441bbd 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -96,6 +96,7 @@ void InitBackendInfo() g_Config.backend_info.bSupportsDualSourceBlend = false; g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants; + g_Config.backend_info.bSupportsEarlyZ = false; // adapters g_Config.backend_info.Adapters.clear(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 1eed65a01e..c64ddbe1ac 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -539,7 +539,7 @@ void ProgramShaderCache::CreateHeader ( void ) "%s\n" "%s\n" - , v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : "140" + , v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : v==GLSL_140 ? "140" : "150" , v==GLSLES3 ? "precision highp float;" : "" , g_ActiveConfig.backend_info.bSupportsGLSLUBO && v