From 24e5d21780c6ece01ef8a2315eb9ed5300535688 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 17 Jan 2016 01:41:26 +1300 Subject: [PATCH] Multithreadded Shadergen: Second pass over Pixel Shadergen. Note: It's not 100% perfect, as some of the GPU capablities leak into the pixel shader UID. Currently our UIDs don't get exported, so there is no issue. But someone might want to fix this in the future. --- .../VideoBackends/D3D/PixelShaderCache.cpp | 6 +- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 6 +- Source/Core/VideoBackends/Null/ShaderCache.h | 6 +- .../VideoBackends/OGL/ProgramShaderCache.cpp | 6 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 393 ++++++++++-------- Source/Core/VideoCommon/PixelShaderGen.h | 9 +- 6 files changed, 238 insertions(+), 188 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 3d48784647..e739c6ae06 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -556,10 +556,10 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) { - PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode, API_D3D); + PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); } @@ -588,7 +588,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) } // Need to compile a new shader - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode)) diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 5c4cf17962..5fe90bd410 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -163,7 +163,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g SetCurrentPrimitiveTopology(gs_primitive_type); GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); - PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); + PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode); VertexShaderUid vs_uid = GetVertexShaderUid(); bool gs_changed = gs_uid != s_last_geometry_shader_uid; @@ -263,7 +263,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p"); } @@ -275,7 +275,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ } else { - ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); ID3DBlob* ps_bytecode = nullptr; if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 9cdc57c892..7cd3e2863e 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -81,12 +81,12 @@ protected: PixelShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) override { - return GetPixelShaderUid(dst_alpha_mode, api_type); + return GetPixelShaderUid(dst_alpha_mode); } ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, - PixelShaderUid) override + PixelShaderUid uid) override { - return GeneratePixelShaderCode(dst_alpha_mode, api_type); + return GeneratePixelShaderCode(dst_alpha_mode, api_type, uid.GetUidData()); } }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 5d9da051bb..2a8accd5b9 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -210,7 +210,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ newentry.in_cache = 0; ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData()); - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); + ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) @@ -397,13 +397,13 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type) { - uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); + uid->puid = GetPixelShaderUid(dstAlphaMode); uid->vuid = GetVertexShaderUid(); uid->guid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); + ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid->puid.GetUidData()); pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 006fa7584a..a94cd02c15 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -151,28 +151,14 @@ static const char* tevRasTable[] = { static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb"}; static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"}; -template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType); -template -static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); -template -static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, - bool stereo, API_TYPE ApiType); -template -static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, - DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template -static void WriteFog(T& out, pixel_shader_uid_data* uid_data); - -template -static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) +// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support) +// leak +// into this UID; This is really unhelpful if these UIDs ever move from one machine to +// another. +PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode) { - T out; - // Non-uid template parameters will write to the dummy data (=> gets optimized out) - pixel_shader_uid_data dummy_data; - pixel_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data == nullptr) - uid_data = &dummy_data; + PixelShaderUid out; + pixel_shader_uid_data* uid_data = out.GetUidData(); memset(uid_data, 0, sizeof(*uid_data)); uid_data->dstAlphaMode = dstAlphaMode; @@ -185,6 +171,186 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) u32 numStages = uid_data->genMode_numtevstages + 1; + const bool forced_early_z = + g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && + (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) + // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. + // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. + && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); + const bool per_pixel_depth = + (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || + (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || + (bpmem.zmode.testenable && bpmem.genMode.zfreeze); + + uid_data->per_pixel_depth = per_pixel_depth; + uid_data->forced_early_z = forced_early_z; + uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + uid_data->stereo = g_ActiveConfig.iStereoMode > 0; + + if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() && + (!uid_data->fast_depth_calc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) + { + static bool warn_once = true; + if (warn_once) + WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current " + "configuration. Make sure to enable fast depth calculations. If this message " + "still shows up your hardware isn't able to emulate the feature properly (a " + "GPU with D3D 11.0 / OGL 4.2 support is required)."); + warn_once = false; + } + + if (uid_data->per_pixel_lighting) + { + // The lighting shader only needs the two color bits of the 23bit component bit array. + uid_data->components = + (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; + ; + GetLightingShaderUid(uid_data->lighting); + } + + if (uid_data->genMode_numtexgens > 0) + { + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) + { + // optional perspective divides + uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; + } + } + + // indirect texture map lookup + int nIndirectStagesUsed = 0; + if (uid_data->genMode_numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + + uid_data->nIndirectStagesUsed = nIndirectStagesUsed; + for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) + { + if (uid_data->nIndirectStagesUsed & (1 << i)) + uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); + } + + for (unsigned int n = 0; n < numStages; n++) + { + int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); + bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; + // HACK to handle cases where the tex gen is not enabled + if (!bHasTexCoord) + texcoord = bpmem.genMode.numtexgens; + + uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; + uid_data->stagehash[n].tevorders_texcoord = texcoord; + if (uid_data->stagehash[n].hasindstage) + uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; + + TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC; + TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC; + uid_data->stagehash[n].cc = cc.hex & 0xFFFFFF; + uid_data->stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later + + if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || + cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || + cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || + ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) + { + const int i = bpmem.combiners[n].alphaC.rswap; + uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); + } + + uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); + if (uid_data->stagehash[n].tevorders_enable) + { + const int i = bpmem.combiners[n].alphaC.tswap; + uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + } + + if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || + cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || + ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) + { + uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); + uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); + } + } + +#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) + uid_data->num_values = (uid_data->per_pixel_lighting) ? + sizeof(*uid_data) : + MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + + AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); + uid_data->Pretest = Pretest; + uid_data->late_ztest = bpmem.UseLateDepthTest(); + + // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled + // (in this case we need to write a depth value if depth test passes regardless of the alpha + // testing result) + if (uid_data->Pretest == AlphaTest::UNDETERMINED || + (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) + { + uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; + uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; + uid_data->alpha_test_logic = bpmem.alpha_test.logic; + + // ZCOMPLOC HACK: + // The only way to emulate alpha test + early-z is to force early-z in the shader. + // As this isn't available on all drivers and as we can't emulate this feature otherwise, + // we are only able to choose which one we want to respect more. + // Tests seem to have proven that writing depth even when the alpha test fails is more + // important that a reliable alpha test, so we just force the alpha test to always succeed. + // At least this seems to be less buggy. + uid_data->alpha_test_use_zcomploc_hack = + bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && + !g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze; + } + + uid_data->zfreeze = bpmem.genMode.zfreeze; + uid_data->ztex_op = bpmem.ztex2.op; + uid_data->early_ztest = bpmem.UseEarlyDepthTest(); + uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + + if (dstAlphaMode != DSTALPHA_ALPHA_PASS) + { + uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; + uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; + } + + return out; +} + +static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, + API_TYPE ApiType); +static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp, + int shift); +static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap, + bool stereo, API_TYPE ApiType); +static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType, + DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); + +ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, + const pixel_shader_uid_data* uid_data) +{ + ShaderCode out; + + u32 numStages = uid_data->genMode_numtevstages + 1; + out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens, uid_data->genMode_numindstages); @@ -281,27 +447,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, - uid_data->per_pixel_lighting, ""); + GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, + ""); out.Write("};\n"); - { - const bool forced_early_z = - g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && - (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) - // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. - // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. - && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); - const bool per_pixel_depth = - (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || - (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || - (bpmem.zmode.testenable && bpmem.genMode.zfreeze); - - uid_data->per_pixel_depth = per_pixel_depth; - uid_data->forced_early_z = forced_early_z; - uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; - } - if (uid_data->forced_early_z) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before @@ -360,9 +509,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) warn_once = false; } - uid_data->msaa = g_ActiveConfig.iMultisamples > 1; - uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; - uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); @@ -375,7 +521,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("in VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); @@ -478,20 +624,13 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat dist, dist2, attn;\n"); - // The lighting shader only needs the two color bits of the 23bit component bit array. - uid_data->components = - (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; - ; - // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - - // FIXME: Disabled until pixelshadergen is split - // GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, - // "colors_", "col"); + GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled @@ -506,7 +645,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) { out.Write("\tint2 fixpoint_uv%d = itrunc(", i); // optional perspective divides - uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) { out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); @@ -520,24 +658,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } } - // indirect texture map lookup - int nIndirectStagesUsed = 0; - if (uid_data->genMode_numindstages > 0) - { - for (unsigned int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - - uid_data->nIndirectStagesUsed = nIndirectStagesUsed; for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) { if (uid_data->nIndirectStagesUsed & (1 << i)) { - uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); - unsigned int texcoord = uid_data->GetTevindirefCoord(i); unsigned int texmap = uid_data->GetTevindirefMap(i); @@ -551,17 +675,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\tint3 iindtex%d = ", i); - SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); + SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); } } for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, uid_data, i, ApiType); // build the equation for this stage - -#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) - uid_data->num_values = (uid_data->per_pixel_lighting) ? - sizeof(*uid_data) : - MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + WriteStage(out, uid_data, i, ApiType); // build the equation for this stage { // The results of the last texenv stage are put onto the screen, @@ -581,18 +700,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tprev = prev & 255;\n"); - AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); - uid_data->Pretest = Pretest; - uid_data->late_ztest = bpmem.UseLateDepthTest(); - // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha // testing result) if (uid_data->Pretest == AlphaTest::UNDETERMINED || (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) - WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); - - uid_data->zfreeze = bpmem.genMode.zfreeze; + WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); if (uid_data->zfreeze) { @@ -629,10 +742,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); - uid_data->ztex_op = bpmem.ztex2.op; - uid_data->early_ztest = bpmem.UseEarlyDepthTest(); - uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; - // depth texture can safely be ignored if the result won't be written to the depth buffer // (early_ztest) and isn't used for fog either const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel; @@ -674,7 +783,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } else { - WriteFog(out, uid_data); + WriteFog(out, uid_data); out.Write("\tocol0 = float4(prev) / 255.0;\n"); } @@ -704,25 +813,20 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) return out; } -template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType) +static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, + API_TYPE ApiType) { - int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); - bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; - bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; + auto& stage = uid_data->stagehash[n]; + out.Write("\n\t// TEV stage %d\n", n); + // HACK to handle cases where the tex gen is not enabled + u32 texcoord = stage.tevorders_texcoord; + bool bHasTexCoord = texcoord < uid_data->genMode_numtexgens; if (!bHasTexCoord) texcoord = 0; - out.Write("\n\t// TEV stage %d\n", n); - - auto& stage = uid_data->stagehash[n]; - - uid_data->stagehash[n].hasindstage = bHasIndStage; - uid_data->stagehash[n].tevorders_texcoord = texcoord; if (stage.hasindstage) { - uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; TevStageIndirect tevind; tevind.hex = stage.tevind; @@ -785,7 +889,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, - stage.tevorders_texcoord, n); + texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -798,7 +902,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, - stage.tevorders_texcoord, n); + texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -823,20 +927,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE // wrap S if (tevind.sw == ITW_OFF) - out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", stage.tevorders_texcoord); + out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); else if (tevind.sw == ITW_0) out.Write("\twrappedcoord.x = 0;\n"); else - out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", stage.tevorders_texcoord, + out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", texcoord, tevIndWrapStart[tevind.sw]); // wrap T if (tevind.tw == ITW_OFF) - out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", stage.tevorders_texcoord); + out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); else if (tevind.tw == ITW_0) out.Write("\twrappedcoord.y = 0;\n"); else - out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", stage.tevorders_texcoord, + out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", texcoord, tevIndWrapStart[tevind.tw]); if (tevind.fb_addprev) // add previous tevcoord @@ -848,10 +952,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } - uid_data->stagehash[n].cc = bpmem.combiners[n].colorC.hex & 0xFFFFFF; - uid_data->stagehash[n].ac = - bpmem.combiners[n].alphaC.hex & 0xFFFFF0; // Storing rswap and tswap later - TevStageCombiner::ColorCombiner cc; TevStageCombiner::AlphaCombiner ac; cc.hex = stage.cc; @@ -862,35 +962,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { - const int i = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap; - ac.rswap = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; - uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); - + // Generate swizzle string to represent the Ras color channel swapping char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a], "rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'}; out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap); } - uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); if (stage.tevorders_enable) { - int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); - const int i = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; - ac.tswap = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; - uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + // Generate swizzle string to represent the texture color channel swapping + char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], + "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - if (!bHasIndStage) + if (!stage.hasindstage) { // calc tevcord if (bHasTexCoord) @@ -898,12 +983,9 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE else out.Write("\ttevcoord.xy = int2(0, 0);\n"); } - - char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], - "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - out.Write("\ttextemp = "); - SampleTexture(out, "float2(tevcoord.xy)", texswap, texmap, uid_data->stereo, ApiType); + SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, uid_data->stereo, + ApiType); } else { @@ -914,8 +996,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) { - uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); - uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc], tevKSelTableA[stage.tevksel_ka]); @@ -1014,8 +1094,8 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write(";\n"); } -template -static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift) +static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp, + int shift) { const char* tevScaleTableLeft[] = { "", // SCALE_1 @@ -1061,8 +1141,7 @@ static void WriteTevRegular(T& out, const char* components, int bias, int op, in out.Write(")%s", tevScaleTableRight[shift]); } -template -static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, +static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap, bool stereo, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); @@ -1094,8 +1173,7 @@ static const char* tevAlphaFunclogicTable[] = { " == " // xnor }; -template -static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, +static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char* alphaRef[2] = {I_ALPHA ".r", I_ALPHA ".g"}; @@ -1107,10 +1185,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api else out.Write("\tif(!( "); - uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; - uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; - uid_data->alpha_test_logic = bpmem.alpha_test.logic; - // Lookup the first component from the alpha function table int compindex = uid_data->alpha_test_comp0; out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); @@ -1133,16 +1207,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api out.Write("\t\tdepth = %s;\n", (ApiType == API_D3D) ? "0.0" : "1.0"); // ZCOMPLOC HACK: - // The only way to emulate alpha test + early-z is to force early-z in the shader. - // As this isn't available on all drivers and as we can't emulate this feature otherwise, - // we are only able to choose which one we want to respect more. - // Tests seem to have proven that writing depth even when the alpha test fails is more - // important that a reliable alpha test, so we just force the alpha test to always succeed. - // At least this seems to be less buggy. - uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && - !g_ActiveConfig.backend_info.bSupportsEarlyZ && - !bpmem.genMode.zfreeze; - if (!uid_data->alpha_test_use_zcomploc_hack) { out.Write("\t\tdiscard;\n"); @@ -1164,15 +1228,11 @@ static const char* tevFogFuncsTable[] = { "\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2 }; -template -static void WriteFog(T& out, pixel_shader_uid_data* uid_data) +static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) { - uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; if (uid_data->fog_fsel == 0) return; // no Fog - uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; - out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); out.SetConstantsUsed(C_FOGI, C_FOGI); out.SetConstantsUsed(C_FOGF, C_FOGF + 1); @@ -1198,7 +1258,6 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) // ze *= x_adjust // TODO Instead of this theoretical calculation, we should use the // coefficient table given in the fog range BP registers! - uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (uid_data->fog_RangeBaseEnabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); @@ -1223,13 +1282,3 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) out.Write("\tint ifog = iround(fog * 256.0);\n"); out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); } - -PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) -{ - return GeneratePixelShader(dstAlphaMode, ApiType); -} - -ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) -{ - return GeneratePixelShader(dstAlphaMode, ApiType); -} diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index ed4299e9de..5b02171129 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -87,7 +87,7 @@ struct pixel_shader_uid_data } } - inline u32 GetTevindirefCoord(int index) + inline u32 GetTevindirefCoord(int index) const { if (index == 0) { @@ -108,7 +108,7 @@ struct pixel_shader_uid_data return 0; } - inline u32 GetTevindirefMap(int index) + inline u32 GetTevindirefMap(int index) const { if (index == 0) { @@ -165,5 +165,6 @@ struct pixel_shader_uid_data typedef ShaderUid PixelShaderUid; -ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); -PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); +ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, + const pixel_shader_uid_data* uid_data); +PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode);