From 53c402dbc5cc25a62a1640ba8062fb8b2ebc1461 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Thu, 14 Jan 2016 18:51:37 +1300 Subject: [PATCH 01/12] Multithreadded Shadergen: First Pass over vertex/lighting Shadergens The only code which touches xfmem is code which writes directly into uid_data. All the rest now read their parameters out of uid_data. I also simplified the lighting code so it always generated seperate codepaths for alpha and color channels instead of trying to combine them on the off-chance that the same equation works for all 4 channels. As modern (post 2008) GPUs generally don't calcualte all 4 channels in a single vector, this optimisation is pointless. The shader compiler will undo it during the GLSL/HLSL to IR step. Bug Fix: The about optimisation was also broken, applying the color light equation to the alpha light channel instead of the alpha light euqation. But doesn't look like anything trigged this bug. --- Source/Core/VideoCommon/LightingShaderGen.h | 89 +++++--------- Source/Core/VideoCommon/PixelShaderGen.cpp | 4 +- Source/Core/VideoCommon/VertexShaderGen.cpp | 123 ++++++++++---------- 3 files changed, 90 insertions(+), 126 deletions(-) diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 136e3ec50c..7603d72dde 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -48,17 +48,15 @@ static const char s_lighting_struct[] = "struct Light {\n" template static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, - int coloralpha) + bool alpha) { - const LitChannel& chan = - (litchan_index > 1) ? xfmem.alpha[litchan_index - 2] : xfmem.color[litchan_index]; - const char* swizzle = (coloralpha == 1) ? "xyz" : (coloralpha == 2) ? "w" : "xyzw"; - const char* swizzle_components = (coloralpha == 1) ? "3" : (coloralpha == 2) ? "" : "4"; + const char* swizzle = alpha ? "a" : "rgb"; + const char* swizzle_components = (alpha) ? "" : "3"; - uid_data.attnfunc |= chan.attnfunc << (2 * litchan_index); - uid_data.diffusefunc |= chan.diffusefunc << (2 * litchan_index); + int attnfunc = (uid_data.attnfunc >> (2 * litchan_index)) & 0x3; + int diffusefunc = (uid_data.diffusefunc >> (2 * litchan_index)) & 0x3; - switch (chan.attnfunc) + switch (attnfunc) { case LIGHTATTN_NONE: case LIGHTATTN_DIR: @@ -73,8 +71,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, LIGHT_DIR_PARAMS(index)); object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n", - (chan.diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", - LIGHT_DISTATT_PARAMS(index)); + (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index)); object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " "float3(1.0, attn, attn*attn));\n"); break; @@ -91,11 +88,9 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index)); break; - default: - _assert_(0); } - switch (chan.diffusefunc) + switch (diffusefunc) { case LIGHTDIF_NONE: object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle, @@ -104,7 +99,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n", - swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", + swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; default: @@ -131,7 +126,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com object.Write("{\n"); uid_data.matsource |= xfmem.color[j].matsource << j; - if (color.matsource) // from vertex + bool colormatsource = !!(uid_data.matsource & (1 << j)); + if (colormatsource) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j); @@ -146,10 +142,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } uid_data.enablelighting |= xfmem.color[j].enablelighting << j; - if (color.enablelighting) + if (uid_data.enablelighting & (1 << j)) { uid_data.ambsource |= xfmem.color[j].ambsource << j; - if (color.ambsource) // from vertex + if (uid_data.ambsource & (1 << j)) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("lacc = int4(round(%s%d * 255.0));\n", inColorName, j); @@ -158,7 +154,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com else // TODO: this isn't verified. Here we want to read the ambient from the vertex, // but the vertex itself has no color. So we don't know which value to read. - // Returing 1.0 is the same as disabled lightning, so this could be fine + // Returning 1.0 is the same as disabled lightning, so this could be fine object.Write("lacc = int4(255, 255, 255, 255);\n"); } else // from color @@ -173,9 +169,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com // check if alpha is different uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2); - if (alpha.matsource != color.matsource) + bool alphamatsource = !!(uid_data.matsource & (1 << (j + 2))); + if (alphamatsource != colormatsource) { - if (alpha.matsource) // from vertex + if (alphamatsource) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("mat.w = int(round(%s%d.w * 255.0));\n", inColorName, j); @@ -191,10 +188,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2); - if (alpha.enablelighting) + if (uid_data.enablelighting & (1 << (j + 2))) { uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2); - if (alpha.ambsource) // from vertex + if (uid_data.ambsource & (1 << (j + 2))) // from vertex { if (components & (VB_HAS_COL0 << j)) object.Write("lacc.w = int(round(%s%d.w * 255.0));\n", inColorName, j); @@ -214,53 +211,23 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com object.Write("lacc.w = 255;\n"); } - if (color.enablelighting && alpha.enablelighting) + if (uid_data.enablelighting & (1 << j)) // Color lights { - // both have lighting, test if they use the same lights - int mask = 0; uid_data.attnfunc |= color.attnfunc << (2 * j); - uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2)); uid_data.diffusefunc |= color.diffusefunc << (2 * j); - uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2)); uid_data.light_mask |= color.GetFullLightMask() << (8 * j); - uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2)); - if (color.lightparams == alpha.lightparams) - { - mask = color.GetFullLightMask() & alpha.GetFullLightMask(); - if (mask) - { - for (int i = 0; i < 8; ++i) - { - if (mask & (1 << i)) - { - GenerateLightShader(object, uid_data, i, j, 3); - } - } - } - } - - // no shared lights for (int i = 0; i < 8; ++i) - { - if (!(mask & (1 << i)) && (color.GetFullLightMask() & (1 << i))) - GenerateLightShader(object, uid_data, i, j, 1); - if (!(mask & (1 << i)) && (alpha.GetFullLightMask() & (1 << i))) - GenerateLightShader(object, uid_data, i, j + 2, 2); - } + if (uid_data.light_mask & (1 << (i + 8 * j))) + GenerateLightShader(object, uid_data, i, j, false); } - else if (color.enablelighting || alpha.enablelighting) + if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights { - // lights are disabled on one channel so process only the active ones - const LitChannel& workingchannel = color.enablelighting ? color : alpha; - const int lit_index = color.enablelighting ? j : (j + 2); - int coloralpha = color.enablelighting ? 1 : 2; - - uid_data.light_mask |= workingchannel.GetFullLightMask() << (8 * lit_index); + uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2)); + uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2)); + uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2)); for (int i = 0; i < 8; ++i) - { - if (workingchannel.GetFullLightMask() & (1 << i)) - GenerateLightShader(object, uid_data, i, lit_index, coloralpha); - } + if (uid_data.light_mask & (1 << (i + 8 * (j + 2)))) + GenerateLightShader(object, uid_data, i, j + 2, true); } object.Write("lacc = clamp(lacc, 0, 255);\n"); object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index bc5da9a060..052bae7427 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -319,7 +319,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) // can be made and // doesn't define what will happen if we discard the fragment. But the way modern graphics // hardware is implemented - // means it is not unreasonable to expect the the same behaviour as early_fragment_tests. + // means it is not unreasonable to expect the same behaviour as early_fragment_tests. // We can also assume that if a driver has gone out of its way to support conservative depth and // not image_load_store // as required by OpenGL 4.2 that it will be doing the optimisation. @@ -579,7 +579,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); - // Opengl has reversed vertical screenspace coordiantes + // Opengl has reversed vertical screenspace coordinates if (ApiType == API_OPENGL) out.Write("\tscreenpos.y = %i.0 - screenpos.y;\n", EFB_HEIGHT); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 618d097370..464ce00a30 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -18,14 +18,12 @@ template static T GenerateVertexShader(API_TYPE api_type) { T out; - const u32 components = VertexLoaderManager::g_current_components; // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; vertex_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data != nullptr) - memset(uid_data, 0, sizeof(*uid_data)); - else + if (uid_data == nullptr) uid_data = &dummy_data; + memset(uid_data, 0, sizeof(*uid_data)); _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); @@ -46,30 +44,30 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("};\n"); uid_data->numTexGens = xfmem.numTexGen.numTexGens; - uid_data->components = components; + uid_data->components = VertexLoaderManager::g_current_components; uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; if (api_type == API_OPENGL) { out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); - if (components & VB_HAS_POSMTXIDX) + if (uid_data->components & VB_HAS_POSMTXIDX) out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); - if (components & VB_HAS_NRM0) + if (uid_data->components & VB_HAS_NRM0) out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); - if (components & VB_HAS_NRM1) + if (uid_data->components & VB_HAS_NRM1) out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); - if (components & VB_HAS_NRM2) + if (uid_data->components & VB_HAS_NRM2) out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); - if (components & VB_HAS_COL0) + if (uid_data->components & VB_HAS_COL0) out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); - if (components & VB_HAS_COL1) + if (uid_data->components & VB_HAS_COL1) out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { - u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); - if ((components & (VB_HAS_UV0 << i)) || hastexmtx) + u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); + if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); } @@ -85,13 +83,13 @@ static T GenerateVertexShader(API_TYPE api_type) // Let's set up attributes for (u32 i = 0; i < 8; ++i) { - if (i < xfmem.numTexGen.numTexGens) + if (i < uid_data->numTexGens) { out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i); } } out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier()); - if (g_ActiveConfig.bEnablePixelLighting) + if (uid_data->pixel_lighting) { out.Write("%s out float3 Normal;\n", GetInterpolationQualifier()); out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier()); @@ -107,23 +105,23 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("VS_OUTPUT main(\n"); // inputs - if (components & VB_HAS_NRM0) + if (uid_data->components & VB_HAS_NRM0) out.Write(" float3 rawnorm0 : NORMAL0,\n"); - if (components & VB_HAS_NRM1) + if (uid_data->components & VB_HAS_NRM1) out.Write(" float3 rawnorm1 : NORMAL1,\n"); - if (components & VB_HAS_NRM2) + if (uid_data->components & VB_HAS_NRM2) out.Write(" float3 rawnorm2 : NORMAL2,\n"); - if (components & VB_HAS_COL0) + if (uid_data->components & VB_HAS_COL0) out.Write(" float4 color0 : COLOR0,\n"); - if (components & VB_HAS_COL1) + if (uid_data->components & VB_HAS_COL1) out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { - u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); - if ((components & (VB_HAS_UV0 << i)) || hastexmtx) + u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); + if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } - if (components & VB_HAS_POSMTXIDX) + if (uid_data->components & VB_HAS_POSMTXIDX) out.Write(" int posmtx : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } @@ -131,26 +129,26 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("VS_OUTPUT o;\n"); // transforms - if (components & VB_HAS_POSMTXIDX) + if (uid_data->components & VB_HAS_POSMTXIDX) { out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES "[posmtx], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n"); - if (components & VB_HAS_NRMALL) + if (uid_data->components & VB_HAS_NRMALL) { out.Write("int normidx = posmtx & 31;\n"); out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); } - if (components & VB_HAS_NRM0) + if (uid_data->components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, " "rawnorm0)));\n"); - if (components & VB_HAS_NRM1) + if (uid_data->components & VB_HAS_NRM1) out.Write( "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); - if (components & VB_HAS_NRM2) + if (uid_data->components & VB_HAS_NRM2) out.Write( "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } @@ -158,21 +156,21 @@ static T GenerateVertexShader(API_TYPE api_type) { out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n"); - if (components & VB_HAS_NRM0) + if (uid_data->components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) + if (uid_data->components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n"); - if (components & VB_HAS_NRM2) + if (uid_data->components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n"); } - if (!(components & VB_HAS_NRM0)) + if (!(uid_data->components & VB_HAS_NRM0)) out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION @@ -183,19 +181,19 @@ static T GenerateVertexShader(API_TYPE api_type) "float dist, dist2, attn;\n"); uid_data->numColorChans = xfmem.numChan.numColorChans; - if (xfmem.numChan.numColorChans == 0) + if (uid_data->numColorChans == 0) { - if (components & VB_HAS_COL0) + if (uid_data->components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } - GenerateLightingShader(out, uid_data->lighting, components, "color", "o.colors_"); + GenerateLightingShader(out, uid_data->lighting, uid_data->components, "color", "o.colors_"); - if (xfmem.numChan.numColorChans < 2) + if (uid_data->numColorChans < 2) { - if (components & VB_HAS_COL1) + if (uid_data->components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); @@ -203,20 +201,21 @@ static T GenerateVertexShader(API_TYPE api_type) // transform texcoords out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); - for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) + for (unsigned int i = 0; i < uid_data->numTexGens; ++i) { - TexMtxInfo& texinfo = xfmem.texMtxInfo[i]; + auto& texinfo = uid_data->texMtxInfo[i]; out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); - uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow; + texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow; + texinfo.texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: out.Write("coord.xyz = rawpos.xyz;\n"); break; case XF_SRCNORMAL_INROW: - if (components & VB_HAS_NRM0) + if (uid_data->components & VB_HAS_NRM0) { out.Write("coord.xyz = rawnorm0.xyz;\n"); } @@ -226,20 +225,20 @@ static T GenerateVertexShader(API_TYPE api_type) texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1); break; case XF_SRCBINORMAL_T_INROW: - if (components & VB_HAS_NRM1) + if (uid_data->components & VB_HAS_NRM1) { out.Write("coord.xyz = rawnorm1.xyz;\n"); } break; case XF_SRCBINORMAL_B_INROW: - if (components & VB_HAS_NRM2) + if (uid_data->components & VB_HAS_NRM2) { out.Write("coord.xyz = rawnorm2.xyz;\n"); } break; default: _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); - if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) + if (uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; @@ -250,16 +249,15 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("coord.z = 1.0;\n"); // first transformation - uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map - if (components & (VB_HAS_NRM1 | VB_HAS_NRM2)) + if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) { // transform the light dir into tangent space - uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift; - uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; + texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift; + texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift)); out.Write( @@ -271,7 +269,7 @@ static T GenerateVertexShader(API_TYPE api_type) // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue // Squadron 2 //_assert_(0); // should have normals - uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; + texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } @@ -285,10 +283,10 @@ static T GenerateVertexShader(API_TYPE api_type) case XF_TEXGEN_REGULAR: default: uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; - if (components & (VB_HAS_TEXMTXIDX0 << i)) + if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) { out.Write("int tmp = int(tex%d.z);\n", i); - if (texinfo.projection == XF_TEXPROJ_STQ) + if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n", @@ -300,7 +298,7 @@ static T GenerateVertexShader(API_TYPE api_type) } else { - if (texinfo.projection == XF_TEXPROJ_STQ) + if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]));\n", @@ -315,18 +313,17 @@ static T GenerateVertexShader(API_TYPE api_type) uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? - if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) + if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { - const PostMtxInfo& postInfo = xfmem.postMtxInfo[i]; + auto& postInfo = uid_data->postMtxInfo[i]; - uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index; - int postidx = postInfo.index; + postInfo.index = xfmem.postMtxInfo[i].index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n", - postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); + postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); - uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize; + postInfo.normalize = xfmem.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); @@ -342,15 +339,15 @@ static T GenerateVertexShader(API_TYPE api_type) // clipPos/w needs to be done in pixel shader, not here out.Write("o.clipPos = o.pos;\n"); - if (g_ActiveConfig.bEnablePixelLighting) + if (uid_data->pixel_lighting) { out.Write("o.Normal = _norm0;\n"); out.Write("o.WorldPos = pos.xyz;\n"); - if (components & VB_HAS_COL0) + if (uid_data->components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); - if (components & VB_HAS_COL1) + if (uid_data->components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); } @@ -396,10 +393,10 @@ static T GenerateVertexShader(API_TYPE api_type) { // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. - for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) + for (unsigned int i = 0; i < uid_data->numTexGens; ++i) out.Write("uv%d.xyz = o.tex%d;\n", i, i); out.Write("clipPos = o.clipPos;\n"); - if (g_ActiveConfig.bEnablePixelLighting) + if (uid_data->pixel_lighting) { out.Write("Normal = o.Normal;\n"); out.Write("WorldPos = o.WorldPos;\n"); From 03f2c9648dba5b7919d0c34e8381a02f84851695 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Fri, 15 Jan 2016 17:51:54 +1300 Subject: [PATCH 02/12] Shader UID change: Only store the two bits of components we need. This frees up 21 bits and allows us to shorten the UID struct by an entire 32 bits. It's not strictly needed (as it's encoded into the length) but I added a bit for per-pixel lighiting to make my life easier in the following commits. --- Source/Core/VideoCommon/NativeVertexFormat.h | 1 + Source/Core/VideoCommon/PixelShaderGen.cpp | 10 +++++++--- Source/Core/VideoCommon/PixelShaderGen.h | 11 ++++------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index b5d40bd54e..55f8a57483 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -31,6 +31,7 @@ enum VB_HAS_NRM2 = (1 << 12), VB_HAS_NRMALL = (7 << 10), + VB_COL_SHIFT = 13, VB_HAS_COL0 = (1 << 13), VB_HAS_COL1 = (1 << 14), diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 052bae7427..80e84a59f0 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -169,7 +169,6 @@ template static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) { T out; - const u32 components = VertexLoaderManager::g_current_components; // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; pixel_shader_uid_data* uid_data = out.template GetUidData(); @@ -458,13 +457,18 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat dist, dist2, attn;\n"); + // The lighting shader only needs the two color bits of the 23bit component bit array. + uid_data->components = + (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; + ; + // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - uid_data->components = components; - GenerateLightingShader(out, uid_data->lighting, components, "colors_", "col"); + GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 8a62b6bb04..9f019202fe 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -24,12 +24,11 @@ struct pixel_shader_uid_data u32 num_values; // TODO: Shouldn't be a u32 u32 NumValues() const { return num_values; } - u32 components : 23; + u32 components : 2; u32 dstAlphaMode : 2; u32 Pretest : 2; u32 nIndirectStagesUsed : 4; u32 stereo : 1; - u32 genMode_numtexgens : 4; u32 genMode_numtevstages : 4; u32 genMode_numindstages : 3; @@ -38,20 +37,20 @@ struct pixel_shader_uid_data u32 alpha_test_logic : 2; u32 alpha_test_use_zcomploc_hack : 1; u32 fog_proj : 1; + u32 fog_fsel : 3; u32 fog_RangeBaseEnabled : 1; u32 ztex_op : 2; u32 fast_depth_calc : 1; u32 per_pixel_depth : 1; + u32 per_pixel_lighting : 1; u32 forced_early_z : 1; u32 early_ztest : 1; u32 bounding_box : 1; - - // TODO: 29 bits of padding is a waste. Can we free up some bits elseware? u32 zfreeze : 1; u32 msaa : 1; u32 ssaa : 1; - u32 pad : 29; + u32 pad : 17; u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; @@ -136,8 +135,6 @@ struct pixel_shader_uid_data u32 pad3 : 14; } stagehash[16]; - // TODO: I think we're fine without an enablePixelLighting field, should probably double check, - // though.. LightingUidData lighting; }; #pragma pack() From e99364c7c928bc4bf1002d1ef2f9e06b603be1ae Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sat, 16 Jan 2016 13:01:04 +1300 Subject: [PATCH 03/12] UID Change: Fix bug with indirect stage UIDs Bug Fix: The normal stage UIDs were randomly overwriting indirect stage texture map UID fields. It was possible for multiple shaders with diffrent indirect texture targets to map to the same UID. Once again, it dpesn't look like this bug was ever triggered. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 4 ---- Source/Core/VideoCommon/PixelShaderGen.h | 21 +-------------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 80e84a59f0..296d947dbe 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -849,7 +849,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap; uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; @@ -875,7 +874,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE } const int i = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; @@ -884,8 +882,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); const char* texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; - uid_data->SetTevindrefTexmap(i, texmap); - out.Write("\ttextemp = "); SampleTexture(out, "float2(tevcoord.xy)", texswap, texmap, ApiType); } diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 9f019202fe..2ab23f38b4 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -85,31 +85,12 @@ struct pixel_shader_uid_data tevindref_bi4 = texmap; } } - inline void SetTevindrefTexmap(int index, u32 texmap) - { - if (index == 0) - { - tevindref_bi0 = texmap; - } - else if (index == 1) - { - tevindref_bi1 = texmap; - } - else if (index == 2) - { - tevindref_bi2 = texmap; - } - else if (index == 3) - { - tevindref_bi4 = texmap; - } - } struct { // TODO: Can save a lot space by removing the padding bits u32 cc : 24; - u32 ac : 24; + u32 ac : 24; // tswap and rswap are left blank (encoded into the tevksel fields below) u32 tevorders_texmap : 3; u32 tevorders_texcoord : 3; From 0d996f512b47baa09fa5f61759ac9d11348fccf9 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 17 Jan 2016 02:25:16 +1300 Subject: [PATCH 04/12] Multithreadded Shadergen: First pass over pixel Shadergen Bug Fix: It was theoretically possible for a shader with depth writes disabled to map to the same UID as a shader with late depth writes. No known test cases trigger this. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 363 +++++++++++---------- Source/Core/VideoCommon/PixelShaderGen.h | 45 ++- 2 files changed, 227 insertions(+), 181 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 296d947dbe..431ebeab4e 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -152,13 +152,12 @@ static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"}; template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType, - const char swapModeTable[4][5]); +static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType); template static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); template static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, - API_TYPE ApiType); + bool stereo, API_TYPE ApiType); template static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); @@ -172,22 +171,23 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; pixel_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data != nullptr) - memset(uid_data, 0, sizeof(*uid_data)); - else + if (uid_data == nullptr) uid_data = &dummy_data; - - unsigned int numStages = bpmem.genMode.numtevstages + 1; - unsigned int numTexgen = bpmem.genMode.numtexgens; - - out.Write("//Pixel Shader for TEV stages\n"); - out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, numTexgen, - bpmem.genMode.numindstages.Value()); + memset(uid_data, 0, sizeof(*uid_data)); uid_data->dstAlphaMode = dstAlphaMode; uid_data->genMode_numindstages = bpmem.genMode.numindstages; uid_data->genMode_numtevstages = bpmem.genMode.numtevstages; uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; + uid_data->per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + uid_data->bounding_box = g_ActiveConfig.backend_info.bSupportsBBox && + g_ActiveConfig.bBBoxEnable && BoundingBox::active; + + u32 numStages = uid_data->genMode_numtevstages + 1; + + out.Write("//Pixel Shader for TEV stages\n"); + out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens, + uid_data->genMode_numindstages); // dot product for integer vectors out.Write("int idot(int3 x, int3 y)\n" @@ -249,7 +249,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "\tfloat4 " I_EFBSCALE ";\n" "};\n"); - if (g_ActiveConfig.bEnablePixelLighting) + if (uid_data->per_pixel_lighting) { out.Write("%s", s_lighting_struct); @@ -266,7 +266,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("};\n"); } - if (g_ActiveConfig.backend_info.bSupportsBBox && g_ActiveConfig.bBBoxEnable) + if (uid_data->bounding_box) { if (ApiType == API_OPENGL) { @@ -284,18 +284,24 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) GenerateVSOutputMembers(out, ApiType, ""); out.Write("};\n"); - const bool forced_early_z = - g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && - (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) - // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. - // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. - && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); - const bool per_pixel_depth = - (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || - (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || - (bpmem.zmode.testenable && bpmem.genMode.zfreeze); + { + const bool forced_early_z = + g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && + (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) + // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. + // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. + && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); + const bool per_pixel_depth = + (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || + (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || + (bpmem.zmode.testenable && bpmem.genMode.zfreeze); - if (forced_early_z) + uid_data->per_pixel_depth = per_pixel_depth; + uid_data->forced_early_z = forced_early_z; + uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; + } + + if (uid_data->forced_early_z) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate @@ -341,8 +347,8 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("[earlydepthstencil]\n"); } } - else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || - bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) + else if (bpmem.UseEarlyDepthTest() && + (uid_data->fast_depth_calc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) { static bool warn_once = true; if (warn_once) @@ -355,22 +361,22 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) uid_data->msaa = g_ActiveConfig.iMultisamples > 1; uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) out.Write("out vec4 ocol1;\n"); - if (per_pixel_depth) + if (uid_data->per_pixel_depth) out.Write("#define depth gl_FragDepth\n"); - uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("in VertexData {\n"); GenerateVSOutputMembers(out, ApiType, GetInterpolationQualifier(true, true)); - if (g_ActiveConfig.iStereoMode > 0) + if (uid_data->stereo) out.Write("\tflat int layer;\n"); out.Write("};\n"); @@ -381,12 +387,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("%s in float4 colors_1;\n", GetInterpolationQualifier()); // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes - for (unsigned int i = 0; i < numTexgen; ++i) + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) { out.Write("%s in float3 uv%d;\n", GetInterpolationQualifier(), i); } out.Write("%s in float4 clipPos;\n", GetInterpolationQualifier()); - if (g_ActiveConfig.bEnablePixelLighting) + if (uid_data->per_pixel_lighting) { out.Write("%s in float3 Normal;\n", GetInterpolationQualifier()); out.Write("%s in float3 WorldPos;\n", GetInterpolationQualifier()); @@ -397,7 +403,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - for (unsigned int i = 0; i < numTexgen; ++i) + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) out.Write("\tfloat3 uv%d = tex%d;\n", i, i); } @@ -409,24 +415,24 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", - per_pixel_depth ? "\n out float depth : SV_Depth," : ""); + uid_data->per_pixel_depth ? "\n out float depth : SV_Depth," : ""); out.Write(" in %s float4 colors_0 : COLOR0,\n", GetInterpolationQualifier()); out.Write(" in %s float4 colors_1 : COLOR1\n", GetInterpolationQualifier()); // compute window position if needed because binding semantic WPOS is not widely supported - for (unsigned int i = 0; i < numTexgen; ++i) + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) out.Write(",\n in %s float3 uv%d : TEXCOORD%d", GetInterpolationQualifier(), i, i); - out.Write(",\n in %s float4 clipPos : TEXCOORD%d", GetInterpolationQualifier(), numTexgen); - if (g_ActiveConfig.bEnablePixelLighting) + out.Write(",\n in %s float4 clipPos : TEXCOORD%d", GetInterpolationQualifier(), + uid_data->genMode_numtexgens); + if (uid_data->per_pixel_lighting) { out.Write(",\n in %s float3 Normal : TEXCOORD%d", GetInterpolationQualifier(), - numTexgen + 1); + uid_data->genMode_numtexgens + 1); out.Write(",\n in %s float3 WorldPos : TEXCOORD%d", GetInterpolationQualifier(), - numTexgen + 2); + uid_data->genMode_numtexgens + 2); } - uid_data->stereo = g_ActiveConfig.iStereoMode > 0; - if (g_ActiveConfig.iStereoMode > 0) + if (uid_data->stereo) out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); out.Write(" ) {\n"); } @@ -448,7 +454,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\tfloat4 col0 = colors_0;\n"); out.Write("\tfloat4 col1 = colors_1;\n"); - if (g_ActiveConfig.bEnablePixelLighting) + if (uid_data->per_pixel_lighting) { out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); out.Write("\tfloat3 pos = WorldPos;\n"); @@ -472,19 +478,19 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } // HACK to handle cases where the tex gen is not enabled - if (numTexgen == 0) + if (uid_data->genMode_numtexgens == 0) { out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); } else { - out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + numTexgen - 1); - for (unsigned int i = 0; i < numTexgen; ++i) + out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1); + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) { out.Write("\tint2 fixpoint_uv%d = itrunc(", i); // optional perspective divides uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; - if (xfmem.texMtxInfo[i].projection == XF_TEXPROJ_STQ) + if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) { out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); } @@ -499,25 +505,26 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) // indirect texture map lookup int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) + if (uid_data->genMode_numindstages > 0) { for (unsigned int i = 0; i < numStages; ++i) { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; } } uid_data->nIndirectStagesUsed = nIndirectStagesUsed; - for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) + for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) { - if (nIndirectStagesUsed & (1 << i)) + if (uid_data->nIndirectStagesUsed & (1 << i)) { - unsigned int texcoord = bpmem.tevindref.getTexCoord(i); - unsigned int texmap = bpmem.tevindref.getTexMap(i); + uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); - uid_data->SetTevindrefValues(i, texcoord, texmap); - if (texcoord < numTexgen) + unsigned int texcoord = uid_data->GetTevindirefCoord(i); + unsigned int texmap = uid_data->GetTevindirefMap(i); + + if (texcoord < uid_data->genMode_numtexgens) { out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2); out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE "[%d].%s;\n", texcoord, i / 2, @@ -527,56 +534,50 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\tint3 iindtex%d = ", i); - SampleTexture(out, "float2(tempcoord)", "abg", texmap, ApiType); + SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); } } - // Uid fields for BuildSwapModeTable are set in WriteStage - char swapModeTable[4][5]; - const char* swapColors = "rgba"; - for (int i = 0; i < 4; i++) - { - swapModeTable[i][0] = swapColors[bpmem.tevksel[i * 2].swap1]; - swapModeTable[i][1] = swapColors[bpmem.tevksel[i * 2].swap2]; - swapModeTable[i][2] = swapColors[bpmem.tevksel[i * 2 + 1].swap1]; - swapModeTable[i][3] = swapColors[bpmem.tevksel[i * 2 + 1].swap2]; - swapModeTable[i][4] = '\0'; - } - for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, uid_data, i, ApiType, swapModeTable); // build the equation for this stage + WriteStage(out, uid_data, i, ApiType); // build the equation for this stage #define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) - bool enable_pl = g_ActiveConfig.bEnablePixelLighting; - uid_data->num_values = - (enable_pl) ? sizeof(*uid_data) : MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + uid_data->num_values = (uid_data->per_pixel_lighting) ? + sizeof(*uid_data) : + MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); - if (numStages) { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register - if (bpmem.combiners[numStages - 1].colorC.dest != 0) + TevStageCombiner::ColorCombiner last_cc; + TevStageCombiner::AlphaCombiner last_ac; + last_cc.hex = uid_data->stagehash[uid_data->genMode_numtevstages].cc; + last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; + if (last_cc.dest != 0) { - out.Write("\tprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); + out.Write("\tprev.rgb = %s;\n", tevCOutputTable[last_cc.dest]); } - if (bpmem.combiners[numStages - 1].alphaC.dest != 0) + if (last_ac.dest != 0) { - out.Write("\tprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); + out.Write("\tprev.a = %s;\n", tevAOutputTable[last_ac.dest]); } } out.Write("\tprev = prev & 255;\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data->Pretest = Pretest; + uid_data->late_ztest = bpmem.UseLateDepthTest(); // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha // testing result) - if (Pretest == AlphaTest::UNDETERMINED || - (Pretest == AlphaTest::FAIL && bpmem.UseLateDepthTest())) - WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, per_pixel_depth); + if (uid_data->Pretest == AlphaTest::UNDETERMINED || + (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) + WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); - if (bpmem.genMode.zfreeze) + uid_data->zfreeze = bpmem.genMode.zfreeze; + + if (uid_data->zfreeze) { out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE); out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE); @@ -590,7 +591,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE ".y * screenpos.y);\n"); } - else if (!g_ActiveConfig.bFastDepthCalc) + else if (!uid_data->fast_depth_calc) { // FastDepth means to trust the depth generated in perspective division. // It should be correct, but it seems not to be as accurate as required. TODO: Find out why! @@ -611,20 +612,16 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); - // depth texture can safely be ignored if the result won't be written to the depth buffer - // (early_ztest) and isn't used for fog either - const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; - uid_data->ztex_op = bpmem.ztex2.op; - uid_data->per_pixel_depth = per_pixel_depth; - uid_data->forced_early_z = forced_early_z; - uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data->early_ztest = bpmem.UseEarlyDepthTest(); uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; - uid_data->zfreeze = bpmem.genMode.zfreeze; + + // depth texture can safely be ignored if the result won't be written to the depth buffer + // (early_ztest) and isn't used for fog either + const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel; // Note: z-textures are not written to depth buffer if early depth test is used - if (per_pixel_depth && bpmem.UseEarlyDepthTest()) + if (uid_data->per_pixel_depth && uid_data->early_ztest) { if (ApiType == API_D3D) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); @@ -635,17 +632,17 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate // ztextures anyway - if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) + if (uid_data->ztex_op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (textemp), hopefully this has been read and // is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1); out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w %s;\n", - (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); + (uid_data->ztex_op == ZTEXTURE_ADD) ? "+ zCoord" : ""); out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); } - if (per_pixel_depth && bpmem.UseLateDepthTest()) + if (uid_data->per_pixel_depth && uid_data->late_ztest) { if (ApiType == API_D3D) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); @@ -675,10 +672,8 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\tocol0.a = float(" I_ALPHA ".a) / 255.0;\n"); } - if (g_ActiveConfig.backend_info.bSupportsBBox && g_ActiveConfig.bBBoxEnable && - BoundingBox::active) + if (uid_data->bounding_box) { - uid_data->bounding_box = true; const char* atomic_op = ApiType == API_OPENGL ? "atomic" : "Interlocked"; out.Write("\tif(bbox_data[0] > int(rawpos.x)) %sMin(bbox_data[0], int(rawpos.x));\n" "\tif(bbox_data[1] < int(rawpos.x)) %sMax(bbox_data[1], int(rawpos.x));\n" @@ -693,8 +688,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType, - const char swapModeTable[4][5]) +static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType) { int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -705,57 +699,57 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write("\n\t// TEV stage %d\n", n); + auto& stage = uid_data->stagehash[n]; + uid_data->stagehash[n].hasindstage = bHasIndStage; uid_data->stagehash[n].tevorders_texcoord = texcoord; - if (bHasIndStage) + if (stage.hasindstage) { - uid_data->stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF; + uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; + TevStageIndirect tevind; + tevind.hex = stage.tevind; out.Write("\t// indirect op\n"); // perform the indirect op on the incoming regular coordinates using iindtex%d as the offset // coords - if (bpmem.tevind[n].bs != ITBA_OFF) + if (tevind.bs != ITBA_OFF) { const char* tevIndAlphaSel[] = {"", "x", "y", "z"}; const char* tevIndAlphaMask[] = {"248", "224", "240", "248"}; // 0b11111000, 0b11100000, 0b11110000, 0b11111000 - out.Write("alphabump = iindtex%d.%s & %s;\n", bpmem.tevind[n].bt, - tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaMask[bpmem.tevind[n].fmt]); + out.Write("alphabump = iindtex%d.%s & %s;\n", tevind.bt, tevIndAlphaSel[tevind.bs], + tevIndAlphaMask[tevind.fmt]); } else { // TODO: Should we reset alphabump to 0 here? } - if (bpmem.tevind[n].mid != 0) + if (tevind.mid != 0) { // format const char* tevIndFmtMask[] = {"255", "31", "15", "7"}; - out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, - tevIndFmtMask[bpmem.tevind[n].fmt]); + out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, tevind.bt, tevIndFmtMask[tevind.fmt]); // bias - TODO: Check if this needs to be this complicated.. const char* tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias const char* tevIndBiasAdd[] = {"-128", "1", "1", "1"}; // indexed by fmt - if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || - bpmem.tevind[n].bias == ITB_U) - out.Write("\tiindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], - tevIndBiasAdd[bpmem.tevind[n].fmt]); - else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || - bpmem.tevind[n].bias == ITB_TU) - out.Write("\tiindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], - tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); - else if (bpmem.tevind[n].bias == ITB_STU) - out.Write("\tiindtevcrd%d.%s += int3(%s, %s, %s);\n", n, - tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], - tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); + if (tevind.bias == ITB_S || tevind.bias == ITB_T || tevind.bias == ITB_U) + out.Write("\tiindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[tevind.bias], + tevIndBiasAdd[tevind.fmt]); + else if (tevind.bias == ITB_ST || tevind.bias == ITB_SU || tevind.bias == ITB_TU) + out.Write("\tiindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[tevind.bias], + tevIndBiasAdd[tevind.fmt], tevIndBiasAdd[tevind.fmt]); + else if (tevind.bias == ITB_STU) + out.Write("\tiindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[tevind.bias], + tevIndBiasAdd[tevind.fmt], tevIndBiasAdd[tevind.fmt], tevIndBiasAdd[tevind.fmt]); // multiply by offset matrix and scale - calculations are likely to overflow badly, // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result - if (bpmem.tevind[n].mid <= 3) + if (tevind.mid <= 3) { - int mtxidx = 2 * (bpmem.tevind[n].mid - 1); + int mtxidx = 2 * (tevind.mid - 1); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX @@ -767,27 +761,27 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE mtxidx, n, mtxidx); out.Write("\telse indtevtrans%d <<= (-" I_INDTEXMTX "[%d].w);\n", n, mtxidx); } - else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) + else if (tevind.mid <= 7 && bHasTexCoord) { // s matrix - _assert_(bpmem.tevind[n].mid >= 5); - int mtxidx = 2 * (bpmem.tevind[n].mid - 5); + _assert_(tevind.mid >= 5); + int mtxidx = 2 * (tevind.mid - 5); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, - texcoord, n); + stage.tevorders_texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); out.Write("\telse indtevtrans%d <<= (-" I_INDTEXMTX "[%d].w);\n", n, mtxidx); } - else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) + else if (tevind.mid <= 11 && bHasTexCoord) { // t matrix - _assert_(bpmem.tevind[n].mid >= 9); - int mtxidx = 2 * (bpmem.tevind[n].mid - 9); + _assert_(tevind.mid >= 9); + int mtxidx = 2 * (tevind.mid - 9); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, - texcoord, n); + stage.tevorders_texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -811,24 +805,24 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE "(32<<7)", "(16<<7)", "1"}; // TODO: Should the last one be 1 or (1<<7)? // wrap S - if (bpmem.tevind[n].sw == ITW_OFF) - out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); - else if (bpmem.tevind[n].sw == ITW_0) + if (tevind.sw == ITW_OFF) + out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", stage.tevorders_texcoord); + else if (tevind.sw == ITW_0) out.Write("\twrappedcoord.x = 0;\n"); else - out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", texcoord, - tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", stage.tevorders_texcoord, + tevIndWrapStart[tevind.sw]); // wrap T - if (bpmem.tevind[n].tw == ITW_OFF) - out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); - else if (bpmem.tevind[n].tw == ITW_0) + if (tevind.tw == ITW_OFF) + out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", stage.tevorders_texcoord); + else if (tevind.tw == ITW_0) out.Write("\twrappedcoord.y = 0;\n"); else - out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", texcoord, - tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", stage.tevorders_texcoord, + tevIndWrapStart[tevind.tw]); - if (bpmem.tevind[n].fb_addprev) // add previous tevcoord + if (tevind.fb_addprev) // add previous tevcoord out.Write("\ttevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); else out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); @@ -837,11 +831,14 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } - TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC; - TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC; + uid_data->stagehash[n].cc = bpmem.combiners[n].colorC.hex & 0xFFFFFF; + uid_data->stagehash[n].ac = + bpmem.combiners[n].alphaC.hex & 0xFFFFF0; // Storing rswap and tswap later - uid_data->stagehash[n].cc = cc.hex & 0xFFFFFF; - uid_data->stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later + TevStageCombiner::ColorCombiner cc; + TevStageCombiner::AlphaCombiner ac; + cc.hex = stage.cc; + ac.hex = stage.ac; if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || @@ -849,21 +846,33 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; + uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap; + ac.rswap = bpmem.combiners[n].alphaC.rswap; uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); - const char* rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - out.Write("\trastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], - rasswap); + char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a], + "rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'}; + + out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap); } uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); - if (bpmem.tevorders[n / 2].getEnable(n & 1)) + if (stage.tevorders_enable) { int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + const int i = bpmem.combiners[n].alphaC.tswap; + uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; + ac.tswap = bpmem.combiners[n].alphaC.tswap; + uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + if (!bHasIndStage) { // calc tevcord @@ -873,17 +882,11 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write("\ttevcoord.xy = int2(0, 0);\n"); } - const int i = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; + char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], + "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); - - const char* texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; out.Write("\ttextemp = "); - SampleTexture(out, "float2(tevcoord.xy)", texswap, texmap, ApiType); + SampleTexture(out, "float2(tevcoord.xy)", texswap, texmap, uid_data->stereo, ApiType); } else { @@ -894,16 +897,17 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) { - int kc = bpmem.tevksel[n / 2].getKC(n & 1); - int ka = bpmem.tevksel[n / 2].getKA(n & 1); - uid_data->stagehash[n].tevksel_kc = kc; - uid_data->stagehash[n].tevksel_ka = ka; - out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); + uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); + out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc], + tevKSelTableA[stage.tevksel_ka]); - if (kc > 7) - out.SetConstantsUsed(C_KCOLORS + ((kc - 0xc) % 4), C_KCOLORS + ((kc - 0xc) % 4)); - if (ka > 7) - out.SetConstantsUsed(C_KCOLORS + ((ka - 0xc) % 4), C_KCOLORS + ((ka - 0xc) % 4)); + if (stage.tevksel_kc > 7) + out.SetConstantsUsed(C_KCOLORS + ((stage.tevksel_kc - 0xc) % 4), + C_KCOLORS + ((stage.tevksel_kc - 0xc) % 4)); + if (stage.tevksel_ka > 7) + out.SetConstantsUsed(C_KCOLORS + ((stage.tevksel_ka - 0xc) % 4), + C_KCOLORS + ((stage.tevksel_ka - 0xc) % 4)); } if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0) @@ -1042,18 +1046,17 @@ static void WriteTevRegular(T& out, const char* components, int bias, int op, in template static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, - API_TYPE ApiType) + bool stereo, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); if (ApiType == API_D3D) out.Write("iround(255.0 * Tex[%d].Sample(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", - texmap, texmap, texcoords, texmap, g_ActiveConfig.iStereoMode > 0 ? "layer" : "0.0", - texswap); + texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); else out.Write("iround(255.0 * texture(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", - texmap, texcoords, texmap, g_ActiveConfig.iStereoMode > 0 ? "layer" : "0.0", texswap); + texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); } static const char* tevAlphaFuncsTable[] = { @@ -1092,13 +1095,13 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api uid_data->alpha_test_logic = bpmem.alpha_test.logic; // Lookup the first component from the alpha function table - int compindex = bpmem.alpha_test.comp0; + int compindex = uid_data->alpha_test_comp0; out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); - out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]); // lookup the logic op + out.Write("%s", tevAlphaFunclogicTable[uid_data->alpha_test_logic]); // lookup the logic op // Lookup the second component from the alpha function table - compindex = bpmem.alpha_test.comp1; + compindex = uid_data->alpha_test_comp1; out.Write(tevAlphaFuncsTable[compindex], alphaRef[1]); if (DriverDetails::HasBug(DriverDetails::BUG_BROKENNEGATEDBOOLEAN)) @@ -1148,7 +1151,7 @@ template static void WriteFog(T& out, pixel_shader_uid_data* uid_data) { uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; - if (bpmem.fog.c_proj_fsel.fsel == 0) + if (uid_data->fog_fsel == 0) return; // no Fog uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; @@ -1156,7 +1159,7 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); out.SetConstantsUsed(C_FOGI, C_FOGI); out.SetConstantsUsed(C_FOGF, C_FOGF + 1); - if (bpmem.fog.c_proj_fsel.proj == 0) + if (uid_data->fog_proj == 0) { // perspective // ze = A/(B - (Zs >> B_SHF) @@ -1179,7 +1182,7 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) // TODO Instead of this theoretical calculation, we should use the // coefficient table given in the fog range BP registers! uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; - if (bpmem.fogRange.Base.Enabled) + if (uid_data->fog_RangeBaseEnabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); out.Write("\tfloat x_adjust = (2.0 * (rawpos.x / " I_FOGF "[0].y)) - 1.0 - " I_FOGF "[0].x;\n"); @@ -1190,14 +1193,14 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) out.Write("\tfloat fog = clamp(ze - " I_FOGF "[1].z, 0.0, 1.0);\n"); - if (bpmem.fog.c_proj_fsel.fsel > 3) + if (uid_data->fog_fsel > 3) { - out.Write("%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); + out.Write("%s", tevFogFuncsTable[uid_data->fog_fsel]); } else { - if (bpmem.fog.c_proj_fsel.fsel != 2) - WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); + if (uid_data->fog_fsel != 2) + WARN_LOG(VIDEO, "Unknown Fog Type! %08x", uid_data->fog_fsel); } out.Write("\tint ifog = iround(fog * 256.0);\n"); diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 2ab23f38b4..ed4299e9de 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -46,11 +46,12 @@ struct pixel_shader_uid_data u32 per_pixel_lighting : 1; u32 forced_early_z : 1; u32 early_ztest : 1; + u32 late_ztest : 1; u32 bounding_box : 1; u32 zfreeze : 1; u32 msaa : 1; u32 ssaa : 1; - u32 pad : 17; + u32 pad : 16; u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; @@ -86,6 +87,48 @@ struct pixel_shader_uid_data } } + inline u32 GetTevindirefCoord(int index) + { + if (index == 0) + { + return tevindref_bc0; + } + else if (index == 1) + { + return tevindref_bc1; + } + else if (index == 2) + { + return tevindref_bc3; + } + else if (index == 3) + { + return tevindref_bc4; + } + return 0; + } + + inline u32 GetTevindirefMap(int index) + { + if (index == 0) + { + return tevindref_bi0; + } + else if (index == 1) + { + return tevindref_bi1; + } + else if (index == 2) + { + return tevindref_bi2; + } + else if (index == 3) + { + return tevindref_bi4; + } + return 0; + } + struct { // TODO: Can save a lot space by removing the padding bits From 4969415f38e2d29666e550aa94608559a3e001a4 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sat, 16 Jan 2016 22:56:49 +1300 Subject: [PATCH 05/12] Remove global refrences from common code. Bug Fix: Previously vertex shaders and geometery shaders didn't track antialaising state in their UIDs, which could cause AA bugs on directx. --- Source/Core/VideoCommon/GeometryShaderGen.cpp | 53 ++++++++++++------- Source/Core/VideoCommon/GeometryShaderGen.h | 2 + Source/Core/VideoCommon/PixelShaderGen.cpp | 43 ++++++++++----- Source/Core/VideoCommon/ShaderGenCommon.h | 30 ++++++----- Source/Core/VideoCommon/VertexShaderGen.cpp | 34 +++++++----- Source/Core/VideoCommon/VertexShaderGen.h | 5 +- 6 files changed, 106 insertions(+), 61 deletions(-) diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 02744c2889..2179307afd 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -20,6 +20,12 @@ static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_ template static void EndPrimitive(T& out, API_TYPE ApiType); +template +static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* vertex, + API_TYPE ApiType, bool first_vertex = false); +template +static void EndPrimitive(T& out, geometry_shader_uid_data* uid_data, API_TYPE ApiType); + template static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) { @@ -40,6 +46,8 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) if (g_ActiveConfig.bWireFrame) vertex_out++; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (ApiType == API_OPENGL) { @@ -77,7 +85,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, ""); + GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, ""); out.Write("};\n"); if (ApiType == API_OPENGL) @@ -86,11 +94,15 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("#define InstanceID gl_InvocationID\n"); out.Write("in VertexData {\n"); - GenerateVSOutputMembers(out, ApiType, GetInterpolationQualifier(true, true)); + GenerateVSOutputMembers( + out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); out.Write("} vs[%d];\n", vertex_in); out.Write("out VertexData {\n"); - GenerateVSOutputMembers(out, ApiType, GetInterpolationQualifier(true, false)); + GenerateVSOutputMembers( + out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); if (g_ActiveConfig.iStereoMode > 0) out.Write("\tflat int layer;\n"); @@ -135,8 +147,8 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) if (ApiType == API_OPENGL) { out.Write("\tVS_OUTPUT start, end;\n"); - AssignVSOutputMembers(out, "start", "vs[0]"); - AssignVSOutputMembers(out, "end", "vs[1]"); + AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting); + AssignVSOutputMembers(out, "end", "vs[1]", uid_data->numTexGens, uid_data->pixel_lighting); } else { @@ -166,11 +178,11 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) if (ApiType == API_OPENGL) { out.Write("\tVS_OUTPUT center;\n"); - AssignVSOutputMembers(out, "center", "vs[0]"); + AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting); } else { - out.Write("\tVS_OUTPUT center = o[0];\n"); + out.Write("\tVS_OUTPUT center = o[0];\n", uid_data->numTexGens, uid_data->pixel_lighting); } // Offset from center to upper right vertex @@ -197,7 +209,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) if (ApiType == API_OPENGL) { out.Write("\tVS_OUTPUT f;\n"); - AssignVSOutputMembers(out, "f", "vs[i]"); + AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting); } else { @@ -239,8 +251,8 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) } out.Write("\t}\n"); - EmitVertex(out, "l", ApiType, true); - EmitVertex(out, "r", ApiType); + EmitVertex(out, uid_data, "l", ApiType, true); + EmitVertex(out, uid_data, "r", ApiType); } else if (primitive_type == PRIMITIVE_POINTS) { @@ -268,19 +280,19 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) } out.Write("\t}\n"); - EmitVertex(out, "ll", ApiType, true); - EmitVertex(out, "lr", ApiType); - EmitVertex(out, "ul", ApiType); - EmitVertex(out, "ur", ApiType); + EmitVertex(out, uid_data, "ll", ApiType, true); + EmitVertex(out, uid_data, "lr", ApiType); + EmitVertex(out, uid_data, "ul", ApiType); + EmitVertex(out, uid_data, "ur", ApiType); } else { - EmitVertex(out, "f", ApiType, true); + EmitVertex(out, uid_data, "f", ApiType, true); } out.Write("\t}\n"); - EndPrimitive(out, ApiType); + EndPrimitive(out, uid_data, ApiType); if (g_ActiveConfig.iStereoMode > 0 && !g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("\t}\n"); @@ -291,7 +303,8 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) } template -static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_vertex) +static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* vertex, + API_TYPE ApiType, bool first_vertex) { if (g_ActiveConfig.bWireFrame && first_vertex) out.Write("\tif (i == 0) first = %s;\n", vertex); @@ -299,7 +312,7 @@ static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_ if (ApiType == API_OPENGL) { out.Write("\tgl_Position = %s.pos;\n", vertex); - AssignVSOutputMembers(out, "ps", vertex); + AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting); } else { @@ -312,10 +325,10 @@ static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_ out.Write("\toutput.Append(ps);\n"); } template -static void EndPrimitive(T& out, API_TYPE ApiType) +static void EndPrimitive(T& out, geometry_shader_uid_data* uid_data, API_TYPE ApiType) { if (g_ActiveConfig.bWireFrame) - EmitVertex(out, "first", ApiType); + EmitVertex(out, uid_data, "first", ApiType); if (ApiType == API_OPENGL) out.Write("\tEndPrimitive();\n"); diff --git a/Source/Core/VideoCommon/GeometryShaderGen.h b/Source/Core/VideoCommon/GeometryShaderGen.h index 961ab770c6..fcf4f9c8db 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.h +++ b/Source/Core/VideoCommon/GeometryShaderGen.h @@ -23,6 +23,8 @@ struct geometry_shader_uid_data u32 pixel_lighting : 1; u32 primitive_type : 2; u32 wireframe : 1; + u32 msaa : 1; + u32 ssaa : 1; }; #pragma pack() diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 431ebeab4e..2d1df801db 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -281,7 +281,8 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, ""); + GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, + uid_data->per_pixel_lighting, ""); out.Write("};\n"); { @@ -374,7 +375,9 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("in VertexData {\n"); - GenerateVSOutputMembers(out, ApiType, GetInterpolationQualifier(true, true)); + GenerateVSOutputMembers( + out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); if (uid_data->stereo) out.Write("\tflat int layer;\n"); @@ -383,19 +386,25 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } else { - out.Write("%s in float4 colors_0;\n", GetInterpolationQualifier()); - out.Write("%s in float4 colors_1;\n", GetInterpolationQualifier()); + out.Write("%s in float4 colors_0;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); + out.Write("%s in float4 colors_1;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) { - out.Write("%s in float3 uv%d;\n", GetInterpolationQualifier(), i); + out.Write("%s in float3 uv%d;\n", GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa), + i); } - out.Write("%s in float4 clipPos;\n", GetInterpolationQualifier()); + out.Write("%s in float4 clipPos;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); if (uid_data->per_pixel_lighting) { - out.Write("%s in float3 Normal;\n", GetInterpolationQualifier()); - out.Write("%s in float3 WorldPos;\n", GetInterpolationQualifier()); + out.Write("%s in float3 Normal;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); + out.Write("%s in float3 WorldPos;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); } } @@ -417,19 +426,25 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "", uid_data->per_pixel_depth ? "\n out float depth : SV_Depth," : ""); - out.Write(" in %s float4 colors_0 : COLOR0,\n", GetInterpolationQualifier()); - out.Write(" in %s float4 colors_1 : COLOR1\n", GetInterpolationQualifier()); + out.Write(" in %s float4 colors_0 : COLOR0,\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); + out.Write(" in %s float4 colors_1 : COLOR1\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); // compute window position if needed because binding semantic WPOS is not widely supported for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) - out.Write(",\n in %s float3 uv%d : TEXCOORD%d", GetInterpolationQualifier(), i, i); - out.Write(",\n in %s float4 clipPos : TEXCOORD%d", GetInterpolationQualifier(), + out.Write(",\n in %s float3 uv%d : TEXCOORD%d", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa), i, i); + out.Write(",\n in %s float4 clipPos : TEXCOORD%d", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa), uid_data->genMode_numtexgens); if (uid_data->per_pixel_lighting) { - out.Write(",\n in %s float3 Normal : TEXCOORD%d", GetInterpolationQualifier(), + out.Write(",\n in %s float3 Normal : TEXCOORD%d", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa), uid_data->genMode_numtexgens + 1); - out.Write(",\n in %s float3 WorldPos : TEXCOORD%d", GetInterpolationQualifier(), + out.Write(",\n in %s float3 WorldPos : TEXCOORD%d", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa), uid_data->genMode_numtexgens + 2); } if (uid_data->stereo) diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 2668c8ea7d..dbd6ce8823 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -245,40 +245,41 @@ inline void DefineOutputMember(T& object, API_TYPE api_type, const char* qualifi } template -inline void GenerateVSOutputMembers(T& object, API_TYPE api_type, const char* qualifier) +inline void GenerateVSOutputMembers(T& object, API_TYPE api_type, u32 texgens, + bool per_pixel_lighting, const char* qualifier) { DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "POSITION"); DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0); DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1); - for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) + for (unsigned int i = 0; i < texgens; ++i) DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, "TEXCOORD", i); - DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, "TEXCOORD", - xfmem.numTexGen.numTexGens); + DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, "TEXCOORD", texgens); - if (g_ActiveConfig.bEnablePixelLighting) + if (per_pixel_lighting) { DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, "TEXCOORD", - xfmem.numTexGen.numTexGens + 1); + texgens + 1); DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD", - xfmem.numTexGen.numTexGens + 2); + texgens + 2); } } template -inline void AssignVSOutputMembers(T& object, const char* a, const char* b) +inline void AssignVSOutputMembers(T& object, const char* a, const char* b, u32 texgens, + bool per_pixel_lighting) { object.Write("\t%s.pos = %s.pos;\n", a, b); object.Write("\t%s.colors_0 = %s.colors_0;\n", a, b); object.Write("\t%s.colors_1 = %s.colors_1;\n", a, b); - for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) + for (unsigned int i = 0; i < texgens; ++i) object.Write("\t%s.tex%d = %s.tex%d;\n", a, i, b, i); object.Write("\t%s.clipPos = %s.clipPos;\n", a, b); - if (g_ActiveConfig.bEnablePixelLighting) + if (per_pixel_lighting) { object.Write("\t%s.Normal = %s.Normal;\n", a, b); object.Write("\t%s.WorldPos = %s.WorldPos;\n", a, b); @@ -293,23 +294,24 @@ inline void AssignVSOutputMembers(T& object, const char* a, const char* b) // As a workaround, we interpolate at the centroid of the coveraged pixel, which // is always inside the primitive. // Without MSAA, this flag is defined to have no effect. -inline const char* GetInterpolationQualifier(bool in_glsl_interface_block = false, bool in = false) +inline const char* GetInterpolationQualifier(bool msaa, bool ssaa, + bool in_glsl_interface_block = false, bool in = false) { - if (g_ActiveConfig.iMultisamples <= 1) + if (!msaa) return ""; // Without GL_ARB_shading_language_420pack support, the interpolation qualifier must be // "centroid in" and not "centroid", even within an interface block. if (in_glsl_interface_block && !g_ActiveConfig.backend_info.bSupportsBindingLayout) { - if (!g_ActiveConfig.bSSAA) + if (!ssaa) return in ? "centroid in" : "centroid out"; else return in ? "sample in" : "sample out"; } else { - if (!g_ActiveConfig.bSSAA) + if (!ssaa) return "centroid"; else return "sample"; diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 464ce00a30..f417988a21 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -39,13 +39,15 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write(s_shader_uniforms); out.Write("};\n"); - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, api_type, ""); - out.Write("};\n"); - uid_data->numTexGens = xfmem.numTexGen.numTexGens; uid_data->components = VertexLoaderManager::g_current_components; uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + + out.Write("struct VS_OUTPUT {\n"); + GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, ""); + out.Write("};\n"); if (api_type == API_OPENGL) { @@ -75,7 +77,9 @@ static T GenerateVertexShader(API_TYPE api_type) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); - GenerateVSOutputMembers(out, api_type, GetInterpolationQualifier(true, false)); + GenerateVSOutputMembers( + out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); out.Write("} vs;\n"); } else @@ -85,17 +89,23 @@ static T GenerateVertexShader(API_TYPE api_type) { if (i < uid_data->numTexGens) { - out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i); + out.Write("%s out float3 uv%u;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa), i); } } - out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier()); + out.Write("%s out float4 clipPos;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); if (uid_data->pixel_lighting) { - out.Write("%s out float3 Normal;\n", GetInterpolationQualifier()); - out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier()); + out.Write("%s out float3 Normal;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); + out.Write("%s out float3 WorldPos;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); } - out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier()); - out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier()); + out.Write("%s out float4 colors_0;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); + out.Write("%s out float4 colors_1;\n", + GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa)); } out.Write("void main()\n{\n"); @@ -387,7 +397,7 @@ static T GenerateVertexShader(API_TYPE api_type) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - AssignVSOutputMembers(out, "vs", "o"); + AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, uid_data->pixel_lighting); } else { diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 959226ea15..16931ec26d 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -37,10 +37,13 @@ struct vertex_shader_uid_data u32 numColorChans : 2; u32 dualTexTrans_enabled : 1; u32 pixel_lighting : 1; - u32 pad : 1; + u32 msaa : 1; + u32 ssaa : 1; u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is // 8 bits wide + u32 pad : 15; + struct { u32 inputform : 2; From fa02f47729b0caeec455a47dcd032d663a47a645 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sat, 16 Jan 2016 23:20:09 +1300 Subject: [PATCH 06/12] Multithreadded Shadergen: First pass over geometery Shadergen. --- Source/Core/VideoCommon/GeometryShaderGen.cpp | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 2179307afd..6aa6b4951b 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -33,17 +33,16 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) // Non-uid template parameters will write to the dummy data (=> gets optimized out) geometry_shader_uid_data dummy_data; geometry_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data != nullptr) - memset(uid_data, 0, sizeof(*uid_data)); - else + if (uid_data == nullptr) uid_data = &dummy_data; + memset(uid_data, 0, sizeof(*uid_data)); uid_data->primitive_type = primitive_type; const unsigned int vertex_in = primitive_type + 1; unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4; uid_data->wireframe = g_ActiveConfig.bWireFrame; - if (g_ActiveConfig.bWireFrame) + if (uid_data->wireframe) vertex_out++; uid_data->msaa = g_ActiveConfig.iMultisamples > 1; @@ -55,16 +54,16 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) if (g_ActiveConfig.backend_info.bSupportsGSInstancing) { out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[primitive_type], - g_ActiveConfig.iStereoMode > 0 ? 2 : 1); + uid_data->stereo ? 2 : 1); out.Write("layout(%s_strip, max_vertices = %d) out;\n", - g_ActiveConfig.bWireFrame ? "line" : "triangle", vertex_out); + uid_data->wireframe ? "line" : "triangle", vertex_out); } else { out.Write("layout(%s) in;\n", primitives_ogl[primitive_type]); out.Write("layout(%s_strip, max_vertices = %d) out;\n", - g_ActiveConfig.bWireFrame ? "line" : "triangle", - g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out); + uid_data->wireframe ? "line" : "triangle", + uid_data->stereo ? vertex_out * 2 : vertex_out); } } @@ -104,7 +103,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); - if (g_ActiveConfig.iStereoMode > 0) + if (uid_data->stereo) out.Write("\tflat int layer;\n"); out.Write("} ps;\n"); @@ -116,27 +115,25 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("struct VertexData {\n"); out.Write("\tVS_OUTPUT o;\n"); - if (g_ActiveConfig.iStereoMode > 0) + if (uid_data->stereo) out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n"); out.Write("};\n"); if (g_ActiveConfig.backend_info.bSupportsGSInstancing) { - out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, - g_ActiveConfig.iStereoMode > 0 ? 2 : 1); + out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, uid_data->stereo ? 2 : 1); out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream output, in uint " "InstanceID : SV_GSInstanceID)\n{\n", primitives_d3d[primitive_type], vertex_in, - g_ActiveConfig.bWireFrame ? "Line" : "Triangle"); + uid_data->wireframe ? "Line" : "Triangle"); } else { - out.Write("[maxvertexcount(%d)]\n", - g_ActiveConfig.iStereoMode > 0 ? vertex_out * 2 : vertex_out); + out.Write("[maxvertexcount(%d)]\n", uid_data->stereo ? vertex_out * 2 : vertex_out); out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream output)\n{\n", primitives_d3d[primitive_type], vertex_in, - g_ActiveConfig.bWireFrame ? "Line" : "Triangle"); + uid_data->wireframe ? "Line" : "Triangle"); } out.Write("\tVertexData ps;\n"); @@ -191,7 +188,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) ".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n"); } - if (g_ActiveConfig.iStereoMode > 0) + if (uid_data->stereo) { // If the GPU supports invocation we don't need a for loop and can simply use the // invocation identifier to determine which layer we're rendering. @@ -201,7 +198,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("\tfor (int eye = 0; eye < 2; ++eye) {\n"); } - if (g_ActiveConfig.bWireFrame) + if (uid_data->wireframe) out.Write("\tVS_OUTPUT first;\n"); out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in); @@ -216,7 +213,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("\tVS_OUTPUT f = o[i];\n"); } - if (g_ActiveConfig.iStereoMode > 0) + if (uid_data->stereo) { // Select the output layer out.Write("\tps.layer = eye;\n"); @@ -244,7 +241,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("\tif (" I_TEXOFFSET "[2] != 0) {\n"); out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); - for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) + for (unsigned int i = 0; i < uid_data->numTexGens; ++i) { out.Write("\tif (((" I_TEXOFFSET "[0] >> %d) & 0x1) != 0)\n", i); out.Write("\t\tr.tex%d.x += texOffset;\n", i); @@ -270,7 +267,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET "[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n"); - for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) + for (unsigned int i = 0; i < uid_data->numTexGens; ++i) { out.Write("\tif (((" I_TEXOFFSET "[1] >> %d) & 0x1) != 0) {\n", i); out.Write("\t\tll.tex%d.xy += float2(0,1) * texOffset;\n", i); @@ -294,7 +291,7 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) EndPrimitive(out, uid_data, ApiType); - if (g_ActiveConfig.iStereoMode > 0 && !g_ActiveConfig.backend_info.bSupportsGSInstancing) + if (uid_data->stereo && !g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("\t}\n"); out.Write("}\n"); @@ -306,7 +303,7 @@ template static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* vertex, API_TYPE ApiType, bool first_vertex) { - if (g_ActiveConfig.bWireFrame && first_vertex) + if (uid_data->wireframe && first_vertex) out.Write("\tif (i == 0) first = %s;\n", vertex); if (ApiType == API_OPENGL) @@ -327,7 +324,7 @@ static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* v template static void EndPrimitive(T& out, geometry_shader_uid_data* uid_data, API_TYPE ApiType) { - if (g_ActiveConfig.bWireFrame) + if (uid_data->wireframe) EmitVertex(out, uid_data, "first", ApiType); if (ApiType == API_OPENGL) From 28c7113e41a70e278e033094d3088e58eeb00c88 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sat, 16 Jan 2016 23:49:39 +1300 Subject: [PATCH 07/12] Multithreadded Shadergen: Second Pass over geometery Shadergen --- .../VideoBackends/D3D/GeometryShaderCache.cpp | 6 +- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 7 +- .../Core/VideoBackends/Null/ShaderCache.cpp | 2 +- Source/Core/VideoBackends/Null/ShaderCache.h | 18 ++-- .../VideoBackends/OGL/ProgramShaderCache.cpp | 7 +- Source/Core/VideoCommon/GeometryShaderGen.cpp | 95 +++++++++---------- Source/Core/VideoCommon/GeometryShaderGen.h | 5 +- 7 files changed, 69 insertions(+), 71 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp index f899f7f078..72a9c6bb4c 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp @@ -196,10 +196,10 @@ void GeometryShaderCache::Shutdown() bool GeometryShaderCache::SetShader(u32 primitive_type) { - GeometryShaderUid uid = GetGeometryShaderUid(primitive_type, API_D3D); + GeometryShaderUid uid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D); + ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D, uid.GetUidData()); geometry_uid_checker.AddToIndexAndCheck(code, uid, "Geometry", "g"); } @@ -235,7 +235,7 @@ bool GeometryShaderCache::SetShader(u32 primitive_type) } // Need to compile a new shader - ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D); + ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D, uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode)) diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 2f93956003..677d50f426 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -162,7 +162,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g { SetCurrentPrimitiveTopology(gs_primitive_type); - GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type, API_D3D); + GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); VertexShaderUid vs_uid = GetVertexShaderUid(API_D3D); @@ -219,7 +219,7 @@ void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_t if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GenerateGeometryShaderCode(gs_primitive_type, API_D3D); + ShaderCode code = GenerateGeometryShaderCode(gs_primitive_type, API_D3D, gs_uid.GetUidData()); s_geometry_uid_checker.AddToIndexAndCheck(code, gs_uid, "Geometry", "g"); } @@ -236,7 +236,8 @@ void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_t } else { - ShaderCode gs_code = GenerateGeometryShaderCode(gs_primitive_type, API_D3D); + ShaderCode gs_code = + GenerateGeometryShaderCode(gs_primitive_type, API_D3D, gs_uid.GetUidData()); ID3DBlob* gs_bytecode = nullptr; if (!D3D::CompileGeometryShader(gs_code.GetBuffer(), &gs_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.cpp b/Source/Core/VideoBackends/Null/ShaderCache.cpp index e0fa000765..19dcc778d4 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.cpp +++ b/Source/Core/VideoBackends/Null/ShaderCache.cpp @@ -59,7 +59,7 @@ bool ShaderCache::SetShader(DSTALPHA_MODE dst_alpha_mode, u32 primitive_typ } // Need to compile a new shader - ShaderCode code = GenerateCode(dst_alpha_mode, primitive_type, API_OPENGL); + ShaderCode code = GenerateCode(dst_alpha_mode, primitive_type, API_OPENGL, uid); m_shaders.emplace(uid, code.GetBuffer()); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 833c8acf36..3ef3479ed2 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -27,7 +27,7 @@ public: protected: virtual Uid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) = 0; virtual ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, - API_TYPE api_type) = 0; + API_TYPE api_type, Uid uid) = 0; private: std::map m_shaders; @@ -47,8 +47,8 @@ protected: { return GetVertexShaderUid(api_type); } - ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, - API_TYPE api_type) override + ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, + VertexShaderUid uid) override { return GenerateVertexShaderCode(api_type); } @@ -63,12 +63,12 @@ protected: GeometryShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) override { - return GetGeometryShaderUid(primitive_type, api_type); + return GetGeometryShaderUid(primitive_type); } - ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, - API_TYPE api_type) override + ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, + GeometryShaderUid uid) override { - return GenerateGeometryShaderCode(primitive_type, api_type); + return GenerateGeometryShaderCode(primitive_type, api_type, uid.GetUidData()); } }; @@ -83,8 +83,8 @@ protected: { return GetPixelShaderUid(dst_alpha_mode, api_type); } - ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, - API_TYPE api_type) override + ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, + PixelShaderUid) override { return GeneratePixelShaderCode(dst_alpha_mode, api_type); } diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 9ec9684224..4f1a5deb11 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -214,7 +214,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) - gcode = GenerateGeometryShaderCode(primitive_type, API_OPENGL); + gcode = GenerateGeometryShaderCode(primitive_type, API_OPENGL, uid.guid.GetUidData()); if (g_ActiveConfig.bEnableShaderDebugging) { @@ -399,7 +399,7 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, { uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); uid->vuid = GetVertexShaderUid(API_OPENGL); - uid->guid = GetGeometryShaderUid(primitive_type, API_OPENGL); + uid->guid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) { @@ -409,7 +409,8 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL); vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v"); - ShaderCode gcode = GenerateGeometryShaderCode(primitive_type, API_OPENGL); + ShaderCode gcode = + GenerateGeometryShaderCode(primitive_type, API_OPENGL, uid->guid.GetUidData()); geometry_uid_checker.AddToIndexAndCheck(gcode, uid->guid, "Geometry", "g"); } } diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 6aa6b4951b..f509072f8e 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -20,34 +20,40 @@ static void EmitVertex(T& out, const char* vertex, API_TYPE ApiType, bool first_ template static void EndPrimitive(T& out, API_TYPE ApiType); -template -static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* vertex, - API_TYPE ApiType, bool first_vertex = false); -template -static void EndPrimitive(T& out, geometry_shader_uid_data* uid_data, API_TYPE ApiType); - -template -static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) +GeometryShaderUid GetGeometryShaderUid(u32 primitive_type) { - T out; - // Non-uid template parameters will write to the dummy data (=> gets optimized out) - geometry_shader_uid_data dummy_data; - geometry_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data == nullptr) - uid_data = &dummy_data; - memset(uid_data, 0, sizeof(*uid_data)); + ShaderUid out; + geometry_shader_uid_data* uid_data = out.GetUidData(); + memset(uid_data, 0, sizeof(geometry_shader_uid_data)); uid_data->primitive_type = primitive_type; - const unsigned int vertex_in = primitive_type + 1; - unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4; - uid_data->wireframe = g_ActiveConfig.bWireFrame; - if (uid_data->wireframe) - vertex_out++; - uid_data->msaa = g_ActiveConfig.iMultisamples > 1; uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; uid_data->stereo = g_ActiveConfig.iStereoMode > 0; + uid_data->numTexGens = xfmem.numTexGen.numTexGens; + uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + + return out; +} + +static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data, + const char* vertex, API_TYPE ApiType, bool first_vertex = false); +static void EndPrimitive(ShaderCode& out, const geometry_shader_uid_data* uid_data, + API_TYPE ApiType); + +ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, + const geometry_shader_uid_data* uid_data) +{ + ShaderCode out; + // Non-uid template parameters will write to the dummy data (=> gets optimized out) + + const unsigned int vertex_in = primitive_type + 1; + unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4; + + if (uid_data->wireframe) + vertex_out++; + if (ApiType == API_OPENGL) { // Insert layout parameters @@ -80,11 +86,10 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) "\tint4 " I_TEXOFFSET ";\n" "};\n"); - uid_data->numTexGens = xfmem.numTexGen.numTexGens; - uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, ""); + GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, + ""); out.Write("};\n"); if (ApiType == API_OPENGL) @@ -93,13 +98,13 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) out.Write("#define InstanceID gl_InvocationID\n"); out.Write("in VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); out.Write("} vs[%d];\n", vertex_in); out.Write("out VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); @@ -248,8 +253,8 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) } out.Write("\t}\n"); - EmitVertex(out, uid_data, "l", ApiType, true); - EmitVertex(out, uid_data, "r", ApiType); + EmitVertex(out, uid_data, "l", ApiType, true); + EmitVertex(out, uid_data, "r", ApiType); } else if (primitive_type == PRIMITIVE_POINTS) { @@ -277,19 +282,19 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) } out.Write("\t}\n"); - EmitVertex(out, uid_data, "ll", ApiType, true); - EmitVertex(out, uid_data, "lr", ApiType); - EmitVertex(out, uid_data, "ul", ApiType); - EmitVertex(out, uid_data, "ur", ApiType); + EmitVertex(out, uid_data, "ll", ApiType, true); + EmitVertex(out, uid_data, "lr", ApiType); + EmitVertex(out, uid_data, "ul", ApiType); + EmitVertex(out, uid_data, "ur", ApiType); } else { - EmitVertex(out, uid_data, "f", ApiType, true); + EmitVertex(out, uid_data, "f", ApiType, true); } out.Write("\t}\n"); - EndPrimitive(out, uid_data, ApiType); + EndPrimitive(out, uid_data, ApiType); if (uid_data->stereo && !g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("\t}\n"); @@ -299,9 +304,8 @@ static T GenerateGeometryShader(u32 primitive_type, API_TYPE ApiType) return out; } -template -static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* vertex, - API_TYPE ApiType, bool first_vertex) +static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data, + const char* vertex, API_TYPE ApiType, bool first_vertex) { if (uid_data->wireframe && first_vertex) out.Write("\tif (i == 0) first = %s;\n", vertex); @@ -321,24 +325,15 @@ static void EmitVertex(T& out, geometry_shader_uid_data* uid_data, const char* v else out.Write("\toutput.Append(ps);\n"); } -template -static void EndPrimitive(T& out, geometry_shader_uid_data* uid_data, API_TYPE ApiType) + +static void EndPrimitive(ShaderCode& out, const geometry_shader_uid_data* uid_data, + API_TYPE ApiType) { if (uid_data->wireframe) - EmitVertex(out, uid_data, "first", ApiType); + EmitVertex(out, uid_data, "first", ApiType); if (ApiType == API_OPENGL) out.Write("\tEndPrimitive();\n"); else out.Write("\toutput.RestartStrip();\n"); } - -GeometryShaderUid GetGeometryShaderUid(u32 primitive_type, API_TYPE ApiType) -{ - return GenerateGeometryShader(primitive_type, ApiType); -} - -ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType) -{ - return GenerateGeometryShader(primitive_type, ApiType); -} diff --git a/Source/Core/VideoCommon/GeometryShaderGen.h b/Source/Core/VideoCommon/GeometryShaderGen.h index fcf4f9c8db..0d10078836 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.h +++ b/Source/Core/VideoCommon/GeometryShaderGen.h @@ -31,5 +31,6 @@ struct geometry_shader_uid_data typedef ShaderUid GeometryShaderUid; -ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType); -GeometryShaderUid GetGeometryShaderUid(u32 primitive_type, API_TYPE ApiType); +ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, + const geometry_shader_uid_data* uid_data); +GeometryShaderUid GetGeometryShaderUid(u32 primitive_type); From 1a831cfc7d4c43f418ae4d65b579bf430aeb278d Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 17 Jan 2016 00:34:06 +1300 Subject: [PATCH 08/12] Multithreadded Shadergen: Second Pass over vertex/lighting Shadergens As much as possible, the asserts have been moved out of the GetUID function. But there are some places where asserts depend on variables that aren't stored in the shader UID. --- .../VideoBackends/D3D/VertexShaderCache.cpp | 6 +- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 6 +- Source/Core/VideoBackends/Null/ShaderCache.h | 4 +- .../VideoBackends/OGL/ProgramShaderCache.cpp | 6 +- Source/Core/VideoCommon/LightingShaderGen.h | 55 ++++++---- Source/Core/VideoCommon/PixelShaderGen.cpp | 6 +- Source/Core/VideoCommon/VertexShaderGen.cpp | 103 +++++++++++------- Source/Core/VideoCommon/VertexShaderGen.h | 4 +- 8 files changed, 114 insertions(+), 76 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index 7b3f71da26..30367724de 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -199,10 +199,10 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader() { - VertexShaderUid uid = GetVertexShaderUid(API_D3D); + VertexShaderUid uid = GetVertexShaderUid(); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GenerateVertexShaderCode(API_D3D); + ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData()); vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); } @@ -227,7 +227,7 @@ bool VertexShaderCache::SetShader() return (entry.shader != nullptr); } - ShaderCode code = GenerateVertexShaderCode(API_D3D); + ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData()); D3DBlob* pbytecode = nullptr; D3D::CompileVertexShader(code.GetBuffer(), &pbytecode); diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 677d50f426..5c4cf17962 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -164,7 +164,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); - VertexShaderUid vs_uid = GetVertexShaderUid(API_D3D); + VertexShaderUid vs_uid = GetVertexShaderUid(); bool gs_changed = gs_uid != s_last_geometry_shader_uid; bool ps_changed = ps_uid != s_last_pixel_shader_uid; @@ -304,7 +304,7 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GenerateVertexShaderCode(API_D3D); + ShaderCode code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData()); s_vertex_uid_checker.AddToIndexAndCheck(code, vs_uid, "Vertex", "v"); } @@ -316,7 +316,7 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) } else { - ShaderCode vs_code = GenerateVertexShaderCode(API_D3D); + ShaderCode vs_code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData()); ID3DBlob* vs_bytecode = nullptr; if (!D3D::CompileVertexShader(vs_code.GetBuffer(), &vs_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 3ef3479ed2..9cdc57c892 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -45,12 +45,12 @@ protected: VertexShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) override { - return GetVertexShaderUid(api_type); + return GetVertexShaderUid(); } ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, VertexShaderUid uid) override { - return GenerateVertexShaderCode(api_type); + return GenerateVertexShaderCode(api_type, uid.GetUidData()); } }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 4f1a5deb11..5d9da051bb 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -209,7 +209,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ last_entry = &newentry; newentry.in_cache = 0; - ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL); + ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData()); ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && @@ -398,7 +398,7 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type) { uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); - uid->vuid = GetVertexShaderUid(API_OPENGL); + uid->vuid = GetVertexShaderUid(); uid->guid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) @@ -406,7 +406,7 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); - ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL); + ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v"); ShaderCode gcode = diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 7603d72dde..8cabb7fc57 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -46,9 +46,8 @@ static const char s_lighting_struct[] = "struct Light {\n" "\tfloat4 dir;\n" "};\n"; -template -static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, - bool alpha) +static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_data, int index, + int litchan_index, bool alpha) { const char* swizzle = alpha ? "a" : "rgb"; const char* swizzle_components = (alpha) ? "" : "3"; @@ -114,18 +113,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps -template -static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, - const char* inColorName, const char* dest) +static void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, + int components, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++) { - const LitChannel& color = xfmem.color[j]; - const LitChannel& alpha = xfmem.alpha[j]; - object.Write("{\n"); - uid_data.matsource |= xfmem.color[j].matsource << j; bool colormatsource = !!(uid_data.matsource & (1 << j)); if (colormatsource) // from vertex { @@ -141,10 +135,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com object.Write("int4 mat = %s[%d];\n", I_MATERIALS, j + 2); } - uid_data.enablelighting |= xfmem.color[j].enablelighting << j; if (uid_data.enablelighting & (1 << j)) { - uid_data.ambsource |= xfmem.color[j].ambsource << j; if (uid_data.ambsource & (1 << j)) // from vertex { if (components & (VB_HAS_COL0 << j)) @@ -168,7 +160,6 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } // check if alpha is different - uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2); bool alphamatsource = !!(uid_data.matsource & (1 << (j + 2))); if (alphamatsource != colormatsource) { @@ -187,10 +178,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } } - uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2); if (uid_data.enablelighting & (1 << (j + 2))) { - uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2); if (uid_data.ambsource & (1 << (j + 2))) // from vertex { if (components & (VB_HAS_COL0 << j)) @@ -213,24 +202,44 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com if (uid_data.enablelighting & (1 << j)) // Color lights { - uid_data.attnfunc |= color.attnfunc << (2 * j); - uid_data.diffusefunc |= color.diffusefunc << (2 * j); - uid_data.light_mask |= color.GetFullLightMask() << (8 * j); for (int i = 0; i < 8; ++i) if (uid_data.light_mask & (1 << (i + 8 * j))) - GenerateLightShader(object, uid_data, i, j, false); + GenerateLightShader(object, uid_data, i, j, false); } if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights { - uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2)); - uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2)); - uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2)); for (int i = 0; i < 8; ++i) if (uid_data.light_mask & (1 << (i + 8 * (j + 2)))) - GenerateLightShader(object, uid_data, i, j + 2, true); + GenerateLightShader(object, uid_data, i, j + 2, true); } object.Write("lacc = clamp(lacc, 0, 255);\n"); object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); object.Write("}\n"); } } + +static void GetLightingShaderUid(LightingUidData& uid_data) +{ + for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++) + { + uid_data.matsource |= xfmem.color[j].matsource << j; + uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2); + uid_data.enablelighting |= xfmem.color[j].enablelighting << j; + uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2); + + if (uid_data.enablelighting & (1 << j)) // Color lights + { + uid_data.ambsource |= xfmem.color[j].ambsource << j; + uid_data.attnfunc |= xfmem.color[j].attnfunc << (2 * j); + uid_data.diffusefunc |= xfmem.color[j].diffusefunc << (2 * j); + uid_data.light_mask |= xfmem.color[j].GetFullLightMask() << (8 * j); + } + if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights + { + uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2); + uid_data.attnfunc |= xfmem.alpha[j].attnfunc << (2 * (j + 2)); + uid_data.diffusefunc |= xfmem.alpha[j].diffusefunc << (2 * (j + 2)); + uid_data.light_mask |= xfmem.alpha[j].GetFullLightMask() << (8 * (j + 2)); + } + } +} diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 2d1df801db..006fa7584a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -488,8 +488,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, - "colors_", "col"); + + // FIXME: Disabled until pixelshadergen is split + // GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + // "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index f417988a21..77444e7dfc 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -14,20 +14,73 @@ #include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VideoConfig.h" -template -static T GenerateVertexShader(API_TYPE api_type) +VertexShaderUid GetVertexShaderUid() { - T out; - // Non-uid template parameters will write to the dummy data (=> gets optimized out) - vertex_shader_uid_data dummy_data; - vertex_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data == nullptr) - uid_data = &dummy_data; + VertexShaderUid out; + vertex_shader_uid_data* uid_data = out.GetUidData(); memset(uid_data, 0, sizeof(*uid_data)); _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); + uid_data->numTexGens = xfmem.numTexGen.numTexGens; + uid_data->components = VertexLoaderManager::g_current_components; + uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + uid_data->numColorChans = xfmem.numChan.numColorChans; + + GetLightingShaderUid(uid_data->lighting); + + // transform texcoords + for (unsigned int i = 0; i < uid_data->numTexGens; ++i) + { + auto& texinfo = uid_data->texMtxInfo[i]; + + texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow; + texinfo.texgentype = xfmem.texMtxInfo[i].texgentype; + texinfo.inputform = xfmem.texMtxInfo[i].inputform; + + // first transformation + switch (texinfo.texgentype) + { + case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map + if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) + { + // transform the light dir into tangent space + texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift; + texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; + } + else + { + texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; + } + break; + case XF_TEXGEN_COLOR_STRGBC0: + case XF_TEXGEN_COLOR_STRGBC1: + break; + case XF_TEXGEN_REGULAR: + default: + uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; + break; + } + + uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; + // CHECKME: does this only work for regular tex gen types? + if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) + { + auto& postInfo = uid_data->postMtxInfo[i]; + postInfo.index = xfmem.postMtxInfo[i].index; + postInfo.normalize = xfmem.postMtxInfo[i].normalize; + } + } + + return out; +} + +ShaderCode GenerateVertexShaderCode(API_TYPE api_type, const vertex_shader_uid_data* uid_data) +{ + ShaderCode out; out.Write("%s", s_lighting_struct); // uniforms @@ -39,14 +92,8 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write(s_shader_uniforms); out.Write("};\n"); - uid_data->numTexGens = xfmem.numTexGen.numTexGens; - uid_data->components = VertexLoaderManager::g_current_components; - uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; - uid_data->msaa = g_ActiveConfig.iMultisamples > 1; - uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, ""); + GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, ""); out.Write("};\n"); if (api_type == API_OPENGL) @@ -77,7 +124,7 @@ static T GenerateVertexShader(API_TYPE api_type) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); out.Write("} vs;\n"); @@ -190,7 +237,6 @@ static T GenerateVertexShader(API_TYPE api_type) "float3 ldir, h, cosAttn, distAttn;\n" "float dist, dist2, attn;\n"); - uid_data->numColorChans = xfmem.numChan.numColorChans; if (uid_data->numColorChans == 0) { if (uid_data->components & VB_HAS_COL0) @@ -199,7 +245,7 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } - GenerateLightingShader(out, uid_data->lighting, uid_data->components, "color", "o.colors_"); + GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, "color", "o.colors_"); if (uid_data->numColorChans < 2) { @@ -217,8 +263,6 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); - texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow; - texinfo.texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: @@ -254,7 +298,7 @@ static T GenerateVertexShader(API_TYPE api_type) break; } // Input form of AB11 sets z element to 1.0 - uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform; + if (texinfo.inputform == XF_TEXINPUT_AB11) out.Write("coord.z = 1.0;\n"); @@ -266,8 +310,6 @@ static T GenerateVertexShader(API_TYPE api_type) if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) { // transform the light dir into tangent space - texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift; - texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift)); out.Write( @@ -279,7 +321,6 @@ static T GenerateVertexShader(API_TYPE api_type) // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue // Squadron 2 //_assert_(0); // should have normals - texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } @@ -292,7 +333,6 @@ static T GenerateVertexShader(API_TYPE api_type) break; case XF_TEXGEN_REGULAR: default: - uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) { out.Write("int tmp = int(tex%d.z);\n", i); @@ -321,19 +361,16 @@ static T GenerateVertexShader(API_TYPE api_type) break; } - uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { auto& postInfo = uid_data->postMtxInfo[i]; - postInfo.index = xfmem.postMtxInfo[i].index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n", postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); - postInfo.normalize = xfmem.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); @@ -425,13 +462,3 @@ static T GenerateVertexShader(API_TYPE api_type) return out; } - -VertexShaderUid GetVertexShaderUid(API_TYPE api_type) -{ - return GenerateVertexShader(api_type); -} - -ShaderCode GenerateVertexShaderCode(API_TYPE api_type) -{ - return GenerateVertexShader(api_type); -} diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 16931ec26d..27ca46e19d 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -66,5 +66,5 @@ struct vertex_shader_uid_data typedef ShaderUid VertexShaderUid; -VertexShaderUid GetVertexShaderUid(API_TYPE api_type); -ShaderCode GenerateVertexShaderCode(API_TYPE api_type); +VertexShaderUid GetVertexShaderUid(); +ShaderCode GenerateVertexShaderCode(API_TYPE api_type, const vertex_shader_uid_data* uid_data); From 24e5d21780c6ece01ef8a2315eb9ed5300535688 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 17 Jan 2016 01:41:26 +1300 Subject: [PATCH 09/12] Multithreadded Shadergen: Second pass over Pixel Shadergen. Note: It's not 100% perfect, as some of the GPU capablities leak into the pixel shader UID. Currently our UIDs don't get exported, so there is no issue. But someone might want to fix this in the future. --- .../VideoBackends/D3D/PixelShaderCache.cpp | 6 +- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 6 +- Source/Core/VideoBackends/Null/ShaderCache.h | 6 +- .../VideoBackends/OGL/ProgramShaderCache.cpp | 6 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 393 ++++++++++-------- Source/Core/VideoCommon/PixelShaderGen.h | 9 +- 6 files changed, 238 insertions(+), 188 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 3d48784647..e739c6ae06 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -556,10 +556,10 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) { - PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode, API_D3D); + PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); } @@ -588,7 +588,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) } // Need to compile a new shader - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode)) diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 5c4cf17962..5fe90bd410 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -163,7 +163,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g SetCurrentPrimitiveTopology(gs_primitive_type); GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); - PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); + PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode); VertexShaderUid vs_uid = GetVertexShaderUid(); bool gs_changed = gs_uid != s_last_geometry_shader_uid; @@ -263,7 +263,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p"); } @@ -275,7 +275,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ } else { - ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); ID3DBlob* ps_bytecode = nullptr; if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 9cdc57c892..7cd3e2863e 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -81,12 +81,12 @@ protected: PixelShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) override { - return GetPixelShaderUid(dst_alpha_mode, api_type); + return GetPixelShaderUid(dst_alpha_mode); } ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, - PixelShaderUid) override + PixelShaderUid uid) override { - return GeneratePixelShaderCode(dst_alpha_mode, api_type); + return GeneratePixelShaderCode(dst_alpha_mode, api_type, uid.GetUidData()); } }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 5d9da051bb..2a8accd5b9 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -210,7 +210,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ newentry.in_cache = 0; ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData()); - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); + ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) @@ -397,13 +397,13 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type) { - uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); + uid->puid = GetPixelShaderUid(dstAlphaMode); uid->vuid = GetVertexShaderUid(); uid->guid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); + ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid->puid.GetUidData()); pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 006fa7584a..a94cd02c15 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -151,28 +151,14 @@ static const char* tevRasTable[] = { static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb"}; static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"}; -template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType); -template -static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); -template -static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, - bool stereo, API_TYPE ApiType); -template -static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, - DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template -static void WriteFog(T& out, pixel_shader_uid_data* uid_data); - -template -static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) +// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support) +// leak +// into this UID; This is really unhelpful if these UIDs ever move from one machine to +// another. +PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode) { - T out; - // Non-uid template parameters will write to the dummy data (=> gets optimized out) - pixel_shader_uid_data dummy_data; - pixel_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data == nullptr) - uid_data = &dummy_data; + PixelShaderUid out; + pixel_shader_uid_data* uid_data = out.GetUidData(); memset(uid_data, 0, sizeof(*uid_data)); uid_data->dstAlphaMode = dstAlphaMode; @@ -185,6 +171,186 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) u32 numStages = uid_data->genMode_numtevstages + 1; + const bool forced_early_z = + g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && + (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) + // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. + // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. + && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); + const bool per_pixel_depth = + (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || + (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || + (bpmem.zmode.testenable && bpmem.genMode.zfreeze); + + uid_data->per_pixel_depth = per_pixel_depth; + uid_data->forced_early_z = forced_early_z; + uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + uid_data->stereo = g_ActiveConfig.iStereoMode > 0; + + if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() && + (!uid_data->fast_depth_calc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) + { + static bool warn_once = true; + if (warn_once) + WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current " + "configuration. Make sure to enable fast depth calculations. If this message " + "still shows up your hardware isn't able to emulate the feature properly (a " + "GPU with D3D 11.0 / OGL 4.2 support is required)."); + warn_once = false; + } + + if (uid_data->per_pixel_lighting) + { + // The lighting shader only needs the two color bits of the 23bit component bit array. + uid_data->components = + (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; + ; + GetLightingShaderUid(uid_data->lighting); + } + + if (uid_data->genMode_numtexgens > 0) + { + for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) + { + // optional perspective divides + uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; + } + } + + // indirect texture map lookup + int nIndirectStagesUsed = 0; + if (uid_data->genMode_numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + + uid_data->nIndirectStagesUsed = nIndirectStagesUsed; + for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) + { + if (uid_data->nIndirectStagesUsed & (1 << i)) + uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); + } + + for (unsigned int n = 0; n < numStages; n++) + { + int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); + bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; + // HACK to handle cases where the tex gen is not enabled + if (!bHasTexCoord) + texcoord = bpmem.genMode.numtexgens; + + uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; + uid_data->stagehash[n].tevorders_texcoord = texcoord; + if (uid_data->stagehash[n].hasindstage) + uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; + + TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC; + TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC; + uid_data->stagehash[n].cc = cc.hex & 0xFFFFFF; + uid_data->stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later + + if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || + cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || + cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || + ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) + { + const int i = bpmem.combiners[n].alphaC.rswap; + uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); + } + + uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); + if (uid_data->stagehash[n].tevorders_enable) + { + const int i = bpmem.combiners[n].alphaC.tswap; + uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; + uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; + uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; + uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; + uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + } + + if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || + cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || + ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) + { + uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); + uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); + } + } + +#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) + uid_data->num_values = (uid_data->per_pixel_lighting) ? + sizeof(*uid_data) : + MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + + AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); + uid_data->Pretest = Pretest; + uid_data->late_ztest = bpmem.UseLateDepthTest(); + + // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled + // (in this case we need to write a depth value if depth test passes regardless of the alpha + // testing result) + if (uid_data->Pretest == AlphaTest::UNDETERMINED || + (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) + { + uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; + uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; + uid_data->alpha_test_logic = bpmem.alpha_test.logic; + + // ZCOMPLOC HACK: + // The only way to emulate alpha test + early-z is to force early-z in the shader. + // As this isn't available on all drivers and as we can't emulate this feature otherwise, + // we are only able to choose which one we want to respect more. + // Tests seem to have proven that writing depth even when the alpha test fails is more + // important that a reliable alpha test, so we just force the alpha test to always succeed. + // At least this seems to be less buggy. + uid_data->alpha_test_use_zcomploc_hack = + bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && + !g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze; + } + + uid_data->zfreeze = bpmem.genMode.zfreeze; + uid_data->ztex_op = bpmem.ztex2.op; + uid_data->early_ztest = bpmem.UseEarlyDepthTest(); + uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + + if (dstAlphaMode != DSTALPHA_ALPHA_PASS) + { + uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; + uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; + } + + return out; +} + +static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, + API_TYPE ApiType); +static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp, + int shift); +static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap, + bool stereo, API_TYPE ApiType); +static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType, + DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); + +ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, + const pixel_shader_uid_data* uid_data) +{ + ShaderCode out; + + u32 numStages = uid_data->genMode_numtevstages + 1; + out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens, uid_data->genMode_numindstages); @@ -281,27 +447,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, - uid_data->per_pixel_lighting, ""); + GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, + ""); out.Write("};\n"); - { - const bool forced_early_z = - g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && - (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) - // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. - // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. - && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); - const bool per_pixel_depth = - (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || - (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || - (bpmem.zmode.testenable && bpmem.genMode.zfreeze); - - uid_data->per_pixel_depth = per_pixel_depth; - uid_data->forced_early_z = forced_early_z; - uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; - } - if (uid_data->forced_early_z) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before @@ -360,9 +509,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) warn_once = false; } - uid_data->msaa = g_ActiveConfig.iMultisamples > 1; - uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; - uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); @@ -375,7 +521,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("in VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); @@ -478,20 +624,13 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat dist, dist2, attn;\n"); - // The lighting shader only needs the two color bits of the 23bit component bit array. - uid_data->components = - (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; - ; - // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - - // FIXME: Disabled until pixelshadergen is split - // GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, - // "colors_", "col"); + GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled @@ -506,7 +645,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) { out.Write("\tint2 fixpoint_uv%d = itrunc(", i); // optional perspective divides - uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) { out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); @@ -520,24 +658,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } } - // indirect texture map lookup - int nIndirectStagesUsed = 0; - if (uid_data->genMode_numindstages > 0) - { - for (unsigned int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - - uid_data->nIndirectStagesUsed = nIndirectStagesUsed; for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) { if (uid_data->nIndirectStagesUsed & (1 << i)) { - uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i)); - unsigned int texcoord = uid_data->GetTevindirefCoord(i); unsigned int texmap = uid_data->GetTevindirefMap(i); @@ -551,17 +675,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\tint3 iindtex%d = ", i); - SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); + SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); } } for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, uid_data, i, ApiType); // build the equation for this stage - -#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) - uid_data->num_values = (uid_data->per_pixel_lighting) ? - sizeof(*uid_data) : - MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); + WriteStage(out, uid_data, i, ApiType); // build the equation for this stage { // The results of the last texenv stage are put onto the screen, @@ -581,18 +700,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tprev = prev & 255;\n"); - AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); - uid_data->Pretest = Pretest; - uid_data->late_ztest = bpmem.UseLateDepthTest(); - // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha // testing result) if (uid_data->Pretest == AlphaTest::UNDETERMINED || (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) - WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); - - uid_data->zfreeze = bpmem.genMode.zfreeze; + WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); if (uid_data->zfreeze) { @@ -629,10 +742,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); - uid_data->ztex_op = bpmem.ztex2.op; - uid_data->early_ztest = bpmem.UseEarlyDepthTest(); - uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; - // depth texture can safely be ignored if the result won't be written to the depth buffer // (early_ztest) and isn't used for fog either const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel; @@ -674,7 +783,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) } else { - WriteFog(out, uid_data); + WriteFog(out, uid_data); out.Write("\tocol0 = float4(prev) / 255.0;\n"); } @@ -704,25 +813,20 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) return out; } -template -static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType) +static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, + API_TYPE ApiType) { - int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); - bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; - bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; + auto& stage = uid_data->stagehash[n]; + out.Write("\n\t// TEV stage %d\n", n); + // HACK to handle cases where the tex gen is not enabled + u32 texcoord = stage.tevorders_texcoord; + bool bHasTexCoord = texcoord < uid_data->genMode_numtexgens; if (!bHasTexCoord) texcoord = 0; - out.Write("\n\t// TEV stage %d\n", n); - - auto& stage = uid_data->stagehash[n]; - - uid_data->stagehash[n].hasindstage = bHasIndStage; - uid_data->stagehash[n].tevorders_texcoord = texcoord; if (stage.hasindstage) { - uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; TevStageIndirect tevind; tevind.hex = stage.tevind; @@ -785,7 +889,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, - stage.tevorders_texcoord, n); + texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -798,7 +902,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, - stage.tevorders_texcoord, n); + texcoord, n); out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", mtxidx, n, mtxidx); @@ -823,20 +927,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE // wrap S if (tevind.sw == ITW_OFF) - out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", stage.tevorders_texcoord); + out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); else if (tevind.sw == ITW_0) out.Write("\twrappedcoord.x = 0;\n"); else - out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", stage.tevorders_texcoord, + out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", texcoord, tevIndWrapStart[tevind.sw]); // wrap T if (tevind.tw == ITW_OFF) - out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", stage.tevorders_texcoord); + out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); else if (tevind.tw == ITW_0) out.Write("\twrappedcoord.y = 0;\n"); else - out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", stage.tevorders_texcoord, + out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", texcoord, tevIndWrapStart[tevind.tw]); if (tevind.fb_addprev) // add previous tevcoord @@ -848,10 +952,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } - uid_data->stagehash[n].cc = bpmem.combiners[n].colorC.hex & 0xFFFFFF; - uid_data->stagehash[n].ac = - bpmem.combiners[n].alphaC.hex & 0xFFFFF0; // Storing rswap and tswap later - TevStageCombiner::ColorCombiner cc; TevStageCombiner::AlphaCombiner ac; cc.hex = stage.cc; @@ -862,35 +962,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { - const int i = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap; - ac.rswap = bpmem.combiners[n].alphaC.rswap; - uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2; - uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); - + // Generate swizzle string to represent the Ras color channel swapping char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a], "rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'}; out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap); } - uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); if (stage.tevorders_enable) { - int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); - const int i = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; - ac.tswap = bpmem.combiners[n].alphaC.tswap; - uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1; - uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2; - uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1; - uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2; - uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); + // Generate swizzle string to represent the texture color channel swapping + char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], + "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - if (!bHasIndStage) + if (!stage.hasindstage) { // calc tevcord if (bHasTexCoord) @@ -898,12 +983,9 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE else out.Write("\ttevcoord.xy = int2(0, 0);\n"); } - - char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c], - "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'}; - out.Write("\ttextemp = "); - SampleTexture(out, "float2(tevcoord.xy)", texswap, texmap, uid_data->stereo, ApiType); + SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, uid_data->stereo, + ApiType); } else { @@ -914,8 +996,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) { - uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1); - uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1); out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc], tevKSelTableA[stage.tevksel_ka]); @@ -1014,8 +1094,8 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE out.Write(";\n"); } -template -static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift) +static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp, + int shift) { const char* tevScaleTableLeft[] = { "", // SCALE_1 @@ -1061,8 +1141,7 @@ static void WriteTevRegular(T& out, const char* components, int bias, int op, in out.Write(")%s", tevScaleTableRight[shift]); } -template -static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap, +static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap, bool stereo, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); @@ -1094,8 +1173,7 @@ static const char* tevAlphaFunclogicTable[] = { " == " // xnor }; -template -static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType, +static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char* alphaRef[2] = {I_ALPHA ".r", I_ALPHA ".g"}; @@ -1107,10 +1185,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api else out.Write("\tif(!( "); - uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; - uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; - uid_data->alpha_test_logic = bpmem.alpha_test.logic; - // Lookup the first component from the alpha function table int compindex = uid_data->alpha_test_comp0; out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); @@ -1133,16 +1207,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api out.Write("\t\tdepth = %s;\n", (ApiType == API_D3D) ? "0.0" : "1.0"); // ZCOMPLOC HACK: - // The only way to emulate alpha test + early-z is to force early-z in the shader. - // As this isn't available on all drivers and as we can't emulate this feature otherwise, - // we are only able to choose which one we want to respect more. - // Tests seem to have proven that writing depth even when the alpha test fails is more - // important that a reliable alpha test, so we just force the alpha test to always succeed. - // At least this seems to be less buggy. - uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && - !g_ActiveConfig.backend_info.bSupportsEarlyZ && - !bpmem.genMode.zfreeze; - if (!uid_data->alpha_test_use_zcomploc_hack) { out.Write("\t\tdiscard;\n"); @@ -1164,15 +1228,11 @@ static const char* tevFogFuncsTable[] = { "\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2 }; -template -static void WriteFog(T& out, pixel_shader_uid_data* uid_data) +static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) { - uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; if (uid_data->fog_fsel == 0) return; // no Fog - uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; - out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); out.SetConstantsUsed(C_FOGI, C_FOGI); out.SetConstantsUsed(C_FOGF, C_FOGF + 1); @@ -1198,7 +1258,6 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) // ze *= x_adjust // TODO Instead of this theoretical calculation, we should use the // coefficient table given in the fog range BP registers! - uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (uid_data->fog_RangeBaseEnabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); @@ -1223,13 +1282,3 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data) out.Write("\tint ifog = iround(fog * 256.0);\n"); out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); } - -PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) -{ - return GeneratePixelShader(dstAlphaMode, ApiType); -} - -ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) -{ - return GeneratePixelShader(dstAlphaMode, ApiType); -} diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index ed4299e9de..5b02171129 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -87,7 +87,7 @@ struct pixel_shader_uid_data } } - inline u32 GetTevindirefCoord(int index) + inline u32 GetTevindirefCoord(int index) const { if (index == 0) { @@ -108,7 +108,7 @@ struct pixel_shader_uid_data return 0; } - inline u32 GetTevindirefMap(int index) + inline u32 GetTevindirefMap(int index) const { if (index == 0) { @@ -165,5 +165,6 @@ struct pixel_shader_uid_data typedef ShaderUid PixelShaderUid; -ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); -PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); +ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, + const pixel_shader_uid_data* uid_data); +PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode); From 95469ec225f325020221c4495b94a7b7cb2475a8 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 17 Jan 2016 01:57:59 +1300 Subject: [PATCH 10/12] Remove UID Checker. Kind of pointless now that multiple shaders with the same UID are now fundementally impossible. --- .../VideoBackends/D3D/GeometryShaderCache.cpp | 7 -- .../VideoBackends/D3D/GeometryShaderCache.h | 2 - .../VideoBackends/D3D/PixelShaderCache.cpp | 7 -- .../Core/VideoBackends/D3D/PixelShaderCache.h | 2 - .../VideoBackends/D3D/VertexShaderCache.cpp | 7 -- .../VideoBackends/D3D/VertexShaderCache.h | 2 - .../Core/VideoBackends/D3D12/ShaderCache.cpp | 26 ------- Source/Core/VideoBackends/Null/ShaderCache.h | 1 - .../VideoBackends/OGL/ProgramShaderCache.cpp | 19 ------ .../VideoBackends/OGL/ProgramShaderCache.h | 4 -- Source/Core/VideoCommon/ShaderGenCommon.h | 68 ------------------- 11 files changed, 145 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp index 72a9c6bb4c..bf0a75c7d8 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp @@ -26,7 +26,6 @@ namespace DX11 GeometryShaderCache::GSCache GeometryShaderCache::GeometryShaders; const GeometryShaderCache::GSCacheEntry* GeometryShaderCache::last_entry; GeometryShaderUid GeometryShaderCache::last_uid; -UidChecker GeometryShaderCache::geometry_uid_checker; const GeometryShaderCache::GSCacheEntry GeometryShaderCache::pass_entry; ID3D11GeometryShader* ClearGeometryShader = nullptr; @@ -177,7 +176,6 @@ void GeometryShaderCache::Clear() for (auto& iter : GeometryShaders) iter.second.Destroy(); GeometryShaders.clear(); - geometry_uid_checker.Invalidate(); last_entry = nullptr; } @@ -197,11 +195,6 @@ void GeometryShaderCache::Shutdown() bool GeometryShaderCache::SetShader(u32 primitive_type) { GeometryShaderUid uid = GetGeometryShaderUid(primitive_type); - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D, uid.GetUidData()); - geometry_uid_checker.AddToIndexAndCheck(code, uid, "Geometry", "g"); - } // Check if the shader is already set if (last_entry) diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h index b3c17cd37f..e64ce151d4 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h @@ -44,8 +44,6 @@ private: static const GSCacheEntry* last_entry; static GeometryShaderUid last_uid; static const GSCacheEntry pass_entry; - - static UidChecker geometry_uid_checker; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index e739c6ae06..144cc131c9 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -26,7 +26,6 @@ namespace DX11 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry; PixelShaderUid PixelShaderCache::last_uid; -UidChecker PixelShaderCache::pixel_uid_checker; LinearDiskCache g_ps_disk_cache; @@ -517,7 +516,6 @@ void PixelShaderCache::Clear() for (auto& iter : PixelShaders) iter.second.Destroy(); PixelShaders.clear(); - pixel_uid_checker.Invalidate(); last_entry = nullptr; } @@ -557,11 +555,6 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) { PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode); - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData()); - pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); - } // Check if the shader is already set if (last_entry) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h index e985374f97..1ca764f646 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.h @@ -53,8 +53,6 @@ private: static PSCache PixelShaders; static const PSCacheEntry* last_entry; static PixelShaderUid last_uid; - - static UidChecker pixel_uid_checker; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index 30367724de..990eb9b284 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -24,7 +24,6 @@ namespace DX11 VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry; VertexShaderUid VertexShaderCache::last_uid; -UidChecker VertexShaderCache::vertex_uid_checker; static ID3D11VertexShader* SimpleVertexShader = nullptr; static ID3D11VertexShader* ClearVertexShader = nullptr; @@ -177,7 +176,6 @@ void VertexShaderCache::Clear() for (auto& iter : vshaders) iter.second.Destroy(); vshaders.clear(); - vertex_uid_checker.Invalidate(); last_entry = nullptr; } @@ -200,11 +198,6 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader() { VertexShaderUid uid = GetVertexShaderUid(); - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData()); - vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); - } if (last_entry) { diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.h b/Source/Core/VideoBackends/D3D/VertexShaderCache.h index d680fca6de..2f691a5172 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.h +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.h @@ -58,8 +58,6 @@ private: static VSCache vshaders; static const VSCacheEntry* last_entry; static VertexShaderUid last_uid; - - static UidChecker vertex_uid_checker; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 5fe90bd410..d57d61073e 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -42,10 +42,6 @@ static LinearDiskCache s_gs_disk_cache; static LinearDiskCache s_ps_disk_cache; static LinearDiskCache s_vs_disk_cache; -static UidChecker s_geometry_uid_checker; -static UidChecker s_pixel_uid_checker; -static UidChecker s_vertex_uid_checker; - static D3D12_SHADER_BYTECODE s_last_geometry_shader_bytecode; static D3D12_SHADER_BYTECODE s_last_pixel_shader_bytecode; static D3D12_SHADER_BYTECODE s_last_vertex_shader_bytecode; @@ -152,10 +148,6 @@ void ShaderCache::Shutdown() s_ps_hlsl_cache.clear(); s_vs_hlsl_cache.clear(); } - - s_geometry_uid_checker.Invalidate(); - s_pixel_uid_checker.Invalidate(); - s_vertex_uid_checker.Invalidate(); } void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 gs_primitive_type) @@ -217,12 +209,6 @@ void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_t { s_last_geometry_shader_uid = gs_uid; - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode code = GenerateGeometryShaderCode(gs_primitive_type, API_D3D, gs_uid.GetUidData()); - s_geometry_uid_checker.AddToIndexAndCheck(code, gs_uid, "Geometry", "g"); - } - if (gs_uid.GetUidData()->IsPassthrough()) { s_last_geometry_shader_bytecode = {}; @@ -261,12 +247,6 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ { s_last_pixel_shader_uid = ps_uid; - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData()); - s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p"); - } - auto ps_iterator = s_ps_bytecode_cache.find(ps_uid); if (ps_iterator != s_ps_bytecode_cache.end()) { @@ -302,12 +282,6 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) { s_last_vertex_shader_uid = vs_uid; - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData()); - s_vertex_uid_checker.AddToIndexAndCheck(code, vs_uid, "Vertex", "v"); - } - auto vs_iterator = s_vs_bytecode_cache.find(vs_uid); if (vs_iterator != s_vs_bytecode_cache.end()) { diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 7cd3e2863e..a988f81ce9 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -33,7 +33,6 @@ private: std::map m_shaders; const std::string* m_last_entry = nullptr; Uid m_last_uid; - UidChecker m_uid_checker; }; class VertexShaderCache : public ShaderCache diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 2a8accd5b9..06ce67acfc 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -36,9 +36,6 @@ static GLuint CurrentProgram = 0; ProgramShaderCache::PCache ProgramShaderCache::pshaders; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry; SHADERUID ProgramShaderCache::last_uid; -UidChecker ProgramShaderCache::pixel_uid_checker; -UidChecker ProgramShaderCache::vertex_uid_checker; -UidChecker ProgramShaderCache::geometry_uid_checker; static std::string s_glsl_header = ""; @@ -400,19 +397,6 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, uid->puid = GetPixelShaderUid(dstAlphaMode); uid->vuid = GetVertexShaderUid(); uid->guid = GetGeometryShaderUid(primitive_type); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid->puid.GetUidData()); - pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); - - ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); - vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v"); - - ShaderCode gcode = - GenerateGeometryShaderCode(primitive_type, API_OPENGL, uid->guid.GetUidData()); - geometry_uid_checker.AddToIndexAndCheck(gcode, uid->guid, "Geometry", "g"); - } } ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram() @@ -517,9 +501,6 @@ void ProgramShaderCache::Shutdown() } pshaders.clear(); - pixel_uid_checker.Invalidate(); - vertex_uid_checker.Invalidate(); - s_buffer.reset(); } diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index 4852d71b86..4c97da20e2 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -88,10 +88,6 @@ private: static PCacheEntry* last_entry; static SHADERUID last_uid; - static UidChecker pixel_uid_checker; - static UidChecker vertex_uid_checker; - static UidChecker geometry_uid_checker; - static u32 s_ubo_buffer_size; static s32 s_ubo_align; }; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index dbd6ce8823..d3e0bb1ec5 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -154,74 +154,6 @@ public: private: std::vector constant_usage; // TODO: Is vector appropriate here? }; -/** - * Checks if there has been - */ -template -class UidChecker -{ -public: - void Invalidate() - { - m_shaders.clear(); - m_uids.clear(); - } - - void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, - const char* dump_prefix) - { - bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end(); - if (!uid_is_indexed) - { - m_uids.push_back(new_uid); - m_shaders[new_uid] = new_code.GetBuffer(); - } - else - { - // uid is already in the index => check if there's a shader with the same uid but different - // code - auto& old_code = m_shaders[new_uid]; - if (old_code != new_code.GetBuffer()) - { - static int num_failures = 0; - - std::string temp = - StringFromFormat("%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), - dump_prefix, ++num_failures); - - // TODO: Should also dump uids - std::ofstream file; - OpenFStream(file, temp, std::ios_base::out); - file << "Old shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code.GetBuffer(); - file << "\n\nShader uid:\n"; - for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i) - { - u8 value = new_uid.GetUidDataRaw()[i]; - if ((i % 4) == 0) - { - auto last_value = - (i + 3 < new_uid.GetUidDataSize() - 1) ? i + 3 : new_uid.GetUidDataSize(); - file << std::setfill(' ') << std::dec; - file << "Values " << std::setw(2) << i << " - " << last_value << ": "; - } - - file << std::setw(2) << std::setfill('0') << std::hex << value << std::setw(1); - if ((i % 4) < 3) - file << ' '; - else - file << std::endl; - } - - ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, temp.c_str()); - } - } - } - -private: - std::map m_shaders; - std::vector m_uids; -}; template inline void DefineOutputMember(T& object, API_TYPE api_type, const char* qualifier, From ebe5fd0b36556a39ac25052966261b56d90ea1ae Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 28 Feb 2016 09:46:58 +1300 Subject: [PATCH 11/12] Multithreadded Shadergen: Minor fixups. --- .../VideoBackends/D3D/GeometryShaderCache.cpp | 2 +- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 3 +-- .../Core/VideoBackends/Null/ShaderCache.cpp | 2 +- Source/Core/VideoBackends/Null/ShaderCache.h | 11 ++++----- .../VideoBackends/OGL/ProgramShaderCache.cpp | 2 +- Source/Core/VideoCommon/GeometryShaderGen.cpp | 23 +++++++++---------- Source/Core/VideoCommon/GeometryShaderGen.h | 3 +-- Source/Core/VideoCommon/PixelShaderGen.h | 6 ++--- Source/Core/VideoCommon/VertexShaderGen.h | 2 +- 9 files changed, 25 insertions(+), 29 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp index bf0a75c7d8..bacca56a7b 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp @@ -228,7 +228,7 @@ bool GeometryShaderCache::SetShader(u32 primitive_type) } // Need to compile a new shader - ShaderCode code = GenerateGeometryShaderCode(primitive_type, API_D3D, uid.GetUidData()); + ShaderCode code = GenerateGeometryShaderCode(API_D3D, uid.GetUidData()); D3DBlob* pbytecode; if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode)) diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index d57d61073e..13a7c99465 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -222,8 +222,7 @@ void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_t } else { - ShaderCode gs_code = - GenerateGeometryShaderCode(gs_primitive_type, API_D3D, gs_uid.GetUidData()); + ShaderCode gs_code = GenerateGeometryShaderCode(API_D3D, gs_uid.GetUidData()); ID3DBlob* gs_bytecode = nullptr; if (!D3D::CompileGeometryShader(gs_code.GetBuffer(), &gs_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.cpp b/Source/Core/VideoBackends/Null/ShaderCache.cpp index 19dcc778d4..a08ef6952b 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.cpp +++ b/Source/Core/VideoBackends/Null/ShaderCache.cpp @@ -59,7 +59,7 @@ bool ShaderCache::SetShader(DSTALPHA_MODE dst_alpha_mode, u32 primitive_typ } // Need to compile a new shader - ShaderCode code = GenerateCode(dst_alpha_mode, primitive_type, API_OPENGL, uid); + ShaderCode code = GenerateCode(dst_alpha_mode, API_OPENGL, uid); m_shaders.emplace(uid, code.GetBuffer()); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index a988f81ce9..2dcdaf85b7 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -26,8 +26,7 @@ public: protected: virtual Uid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) = 0; - virtual ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, - API_TYPE api_type, Uid uid) = 0; + virtual ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, API_TYPE api_type, Uid uid) = 0; private: std::map m_shaders; @@ -46,7 +45,7 @@ protected: { return GetVertexShaderUid(); } - ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, + ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, API_TYPE api_type, VertexShaderUid uid) override { return GenerateVertexShaderCode(api_type, uid.GetUidData()); @@ -64,10 +63,10 @@ protected: { return GetGeometryShaderUid(primitive_type); } - ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, + ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, API_TYPE api_type, GeometryShaderUid uid) override { - return GenerateGeometryShaderCode(primitive_type, api_type, uid.GetUidData()); + return GenerateGeometryShaderCode(api_type, uid.GetUidData()); } }; @@ -82,7 +81,7 @@ protected: { return GetPixelShaderUid(dst_alpha_mode); } - ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, + ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, API_TYPE api_type, PixelShaderUid uid) override { return GeneratePixelShaderCode(dst_alpha_mode, api_type, uid.GetUidData()); diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 06ce67acfc..4425444b8d 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -211,7 +211,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) - gcode = GenerateGeometryShaderCode(primitive_type, API_OPENGL, uid.guid.GetUidData()); + gcode = GenerateGeometryShaderCode(API_OPENGL, uid.guid.GetUidData()); if (g_ActiveConfig.bEnableShaderDebugging) { diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index f509072f8e..a137b7ff06 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -42,14 +42,13 @@ static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data static void EndPrimitive(ShaderCode& out, const geometry_shader_uid_data* uid_data, API_TYPE ApiType); -ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, - const geometry_shader_uid_data* uid_data) +ShaderCode GenerateGeometryShaderCode(API_TYPE ApiType, const geometry_shader_uid_data* uid_data) { ShaderCode out; // Non-uid template parameters will write to the dummy data (=> gets optimized out) - const unsigned int vertex_in = primitive_type + 1; - unsigned int vertex_out = primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4; + const unsigned int vertex_in = uid_data->primitive_type + 1; + unsigned int vertex_out = uid_data->primitive_type == PRIMITIVE_TRIANGLES ? 3 : 4; if (uid_data->wireframe) vertex_out++; @@ -59,14 +58,14 @@ ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, // Insert layout parameters if (g_ActiveConfig.backend_info.bSupportsGSInstancing) { - out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[primitive_type], + out.Write("layout(%s, invocations = %d) in;\n", primitives_ogl[uid_data->primitive_type], uid_data->stereo ? 2 : 1); out.Write("layout(%s_strip, max_vertices = %d) out;\n", uid_data->wireframe ? "line" : "triangle", vertex_out); } else { - out.Write("layout(%s) in;\n", primitives_ogl[primitive_type]); + out.Write("layout(%s) in;\n", primitives_ogl[uid_data->primitive_type]); out.Write("layout(%s_strip, max_vertices = %d) out;\n", uid_data->wireframe ? "line" : "triangle", uid_data->stereo ? vertex_out * 2 : vertex_out); @@ -130,21 +129,21 @@ ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, out.Write("[maxvertexcount(%d)]\n[instance(%d)]\n", vertex_out, uid_data->stereo ? 2 : 1); out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream output, in uint " "InstanceID : SV_GSInstanceID)\n{\n", - primitives_d3d[primitive_type], vertex_in, + primitives_d3d[uid_data->primitive_type], vertex_in, uid_data->wireframe ? "Line" : "Triangle"); } else { out.Write("[maxvertexcount(%d)]\n", uid_data->stereo ? vertex_out * 2 : vertex_out); out.Write("void main(%s VS_OUTPUT o[%d], inout %sStream output)\n{\n", - primitives_d3d[primitive_type], vertex_in, + primitives_d3d[uid_data->primitive_type], vertex_in, uid_data->wireframe ? "Line" : "Triangle"); } out.Write("\tVertexData ps;\n"); } - if (primitive_type == PRIMITIVE_LINES) + if (uid_data->primitive_type == PRIMITIVE_LINES) { if (ApiType == API_OPENGL) { @@ -175,7 +174,7 @@ ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, "\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" "\t}\n"); } - else if (primitive_type == PRIMITIVE_POINTS) + else if (uid_data->primitive_type == PRIMITIVE_POINTS) { if (ApiType == API_OPENGL) { @@ -235,7 +234,7 @@ ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, out.Write("\tf.pos.x += " I_STEREOPARAMS "[eye] * (f.pos.w - " I_STEREOPARAMS "[2]);\n"); } - if (primitive_type == PRIMITIVE_LINES) + if (uid_data->primitive_type == PRIMITIVE_LINES) { out.Write("\tVS_OUTPUT l = f;\n" "\tVS_OUTPUT r = f;\n"); @@ -256,7 +255,7 @@ ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, EmitVertex(out, uid_data, "l", ApiType, true); EmitVertex(out, uid_data, "r", ApiType); } - else if (primitive_type == PRIMITIVE_POINTS) + else if (uid_data->primitive_type == PRIMITIVE_POINTS) { out.Write("\tVS_OUTPUT ll = f;\n" "\tVS_OUTPUT lr = f;\n" diff --git a/Source/Core/VideoCommon/GeometryShaderGen.h b/Source/Core/VideoCommon/GeometryShaderGen.h index 0d10078836..181bc502e1 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.h +++ b/Source/Core/VideoCommon/GeometryShaderGen.h @@ -31,6 +31,5 @@ struct geometry_shader_uid_data typedef ShaderUid GeometryShaderUid; -ShaderCode GenerateGeometryShaderCode(u32 primitive_type, API_TYPE ApiType, - const geometry_shader_uid_data* uid_data); +ShaderCode GenerateGeometryShaderCode(API_TYPE ApiType, const geometry_shader_uid_data* uid_data); GeometryShaderUid GetGeometryShaderUid(u32 primitive_type); diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 5b02171129..b222a1958f 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -63,7 +63,7 @@ struct pixel_shader_uid_data u32 tevindref_bi4 : 3; u32 tevindref_bc4 : 3; - inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap) + void SetTevindrefValues(int index, u32 texcoord, u32 texmap) { if (index == 0) { @@ -87,7 +87,7 @@ struct pixel_shader_uid_data } } - inline u32 GetTevindirefCoord(int index) const + u32 GetTevindirefCoord(int index) const { if (index == 0) { @@ -108,7 +108,7 @@ struct pixel_shader_uid_data return 0; } - inline u32 GetTevindirefMap(int index) const + u32 GetTevindirefMap(int index) const { if (index == 0) { diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 27ca46e19d..e8dfba2e1c 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -39,9 +39,9 @@ struct vertex_shader_uid_data u32 pixel_lighting : 1; u32 msaa : 1; - u32 ssaa : 1; u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is // 8 bits wide + u32 ssaa : 1; u32 pad : 15; struct From 2f134c5c36c3a9c3c7a618635d98d0bf2191b9f2 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 28 Feb 2016 10:01:25 +1300 Subject: [PATCH 12/12] Remove the rest of ShaderDebugging. Without UID checking, it's basically a no-op that disables shader cache and stores the shader source code (without ever reading it back). --- .../VideoBackends/D3D/GeometryShaderCache.cpp | 8 ----- .../VideoBackends/D3D/GeometryShaderCache.h | 2 -- .../VideoBackends/D3D/PixelShaderCache.cpp | 8 ----- .../Core/VideoBackends/D3D/PixelShaderCache.h | 2 -- .../VideoBackends/D3D/VertexShaderCache.cpp | 8 ----- .../VideoBackends/D3D/VertexShaderCache.h | 2 -- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 34 ------------------- .../VideoBackends/OGL/ProgramShaderCache.cpp | 11 ++---- Source/Core/VideoCommon/GeometryShaderGen.cpp | 1 - Source/Core/VideoCommon/VideoConfig.cpp | 14 -------- Source/Core/VideoCommon/VideoConfig.h | 3 -- 11 files changed, 2 insertions(+), 91 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp index bacca56a7b..1452e617e3 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp @@ -164,9 +164,6 @@ void GeometryShaderCache::Init() GeometryShaderCacheInserter inserter; g_gs_disk_cache.OpenAndRead(cache_filename, inserter); - if (g_Config.bEnableShaderDebugging) - Clear(); - last_entry = nullptr; } @@ -243,11 +240,6 @@ bool GeometryShaderCache::SetShader(u32 primitive_type) bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); pbytecode->Release(); - if (g_ActiveConfig.bEnableShaderDebugging && success) - { - GeometryShaders[uid].code = code.GetBuffer(); - } - return success; } diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h index e64ce151d4..f0cab99940 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h @@ -32,8 +32,6 @@ private: { ID3D11GeometryShader* shader; - std::string code; - GSCacheEntry() : shader(nullptr) {} void Destroy() { SAFE_RELEASE(shader); } }; diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 144cc131c9..7e6bc036e7 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -504,9 +504,6 @@ void PixelShaderCache::Init() PixelShaderCacheInserter inserter; g_ps_disk_cache.OpenAndRead(cache_filename, inserter); - if (g_Config.bEnableShaderDebugging) - Clear(); - last_entry = nullptr; } @@ -596,11 +593,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); pbytecode->Release(); - if (g_ActiveConfig.bEnableShaderDebugging && success) - { - PixelShaders[uid].code = code.GetBuffer(); - } - GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return success; } diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h index 1ca764f646..0e81f3cf33 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.h @@ -42,8 +42,6 @@ private: { ID3D11PixelShader* shader; - std::string code; - PSCacheEntry() : shader(nullptr) {} void Destroy() { SAFE_RELEASE(shader); } }; diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index 990eb9b284..cdcff17d02 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -165,9 +165,6 @@ void VertexShaderCache::Init() VertexShaderCacheInserter inserter; g_vs_disk_cache.OpenAndRead(cache_filename, inserter); - if (g_Config.bEnableShaderDebugging) - Clear(); - last_entry = nullptr; } @@ -235,11 +232,6 @@ bool VertexShaderCache::SetShader() bool success = InsertByteCode(uid, pbytecode); pbytecode->Release(); - if (g_ActiveConfig.bEnableShaderDebugging && success) - { - vshaders[uid].code = code.GetBuffer(); - } - GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; } diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.h b/Source/Core/VideoBackends/D3D/VertexShaderCache.h index 2f691a5172..7471d4f769 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.h +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.h @@ -38,8 +38,6 @@ private: ID3D11VertexShader* shader; D3DBlob* bytecode; // needed to initialize the input layout - std::string code; - VSCacheEntry() : shader(nullptr), bytecode(nullptr) {} void SetByteCode(D3DBlob* blob) { diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 13a7c99465..e1803111d3 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -30,14 +30,6 @@ VsBytecodeCache s_vs_bytecode_cache; // Used to keep track of blobs to release at Shutdown time. static std::vector s_shader_blob_list; -// Only used for shader debugging.. -using GsHlslCache = std::map; -using PsHlslCache = std::map; -using VsHlslCache = std::map; -static GsHlslCache s_gs_hlsl_cache; -static PsHlslCache s_ps_hlsl_cache; -static VsHlslCache s_vs_hlsl_cache; - static LinearDiskCache s_gs_disk_cache; static LinearDiskCache s_ps_disk_cache; static LinearDiskCache s_vs_disk_cache; @@ -100,10 +92,6 @@ void ShaderCache::Init() ShaderCacheInserter vs_inserter; s_vs_disk_cache.OpenAndRead(vs_cache_filename, vs_inserter); - // Clear out cache when debugging shaders to ensure stale ones don't stick around.. - if (g_Config.bEnableShaderDebugging) - Clear(); - SETSTAT(stats.numPixelShadersAlive, static_cast(s_ps_bytecode_cache.size())); SETSTAT(stats.numPixelShadersCreated, static_cast(s_ps_bytecode_cache.size())); SETSTAT(stats.numVertexShadersAlive, static_cast(s_vs_bytecode_cache.size())); @@ -141,13 +129,6 @@ void ShaderCache::Shutdown() s_ps_disk_cache.Close(); s_vs_disk_cache.Sync(); s_vs_disk_cache.Close(); - - if (g_Config.bEnableShaderDebugging) - { - s_gs_hlsl_cache.clear(); - s_ps_hlsl_cache.clear(); - s_vs_hlsl_cache.clear(); - } } void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 gs_primitive_type) @@ -234,11 +215,6 @@ void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_t s_last_geometry_shader_bytecode = InsertByteCode(gs_uid, &s_gs_bytecode_cache, gs_bytecode); s_gs_disk_cache.Append(gs_uid, reinterpret_cast(gs_bytecode->GetBufferPointer()), static_cast(gs_bytecode->GetBufferSize())); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - s_gs_hlsl_cache[gs_uid] = gs_code.GetBuffer(); - } } } @@ -269,11 +245,6 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_ SETSTAT(stats.numPixelShadersAlive, static_cast(s_ps_bytecode_cache.size())); INCSTAT(stats.numPixelShadersCreated); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - s_ps_hlsl_cache[ps_uid] = ps_code.GetBuffer(); - } } } @@ -304,11 +275,6 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) SETSTAT(stats.numVertexShadersAlive, static_cast(s_vs_bytecode_cache.size())); INCSTAT(stats.numVertexShadersCreated); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - s_vs_hlsl_cache[vs_uid] = vs_code.GetBuffer(); - } } } diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 4425444b8d..58bd6c2a7b 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -213,13 +213,6 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ !uid.guid.GetUidData()->IsPassthrough()) gcode = GenerateGeometryShaderCode(API_OPENGL, uid.guid.GetUidData()); - if (g_ActiveConfig.bEnableShaderDebugging) - { - newentry.shader.strvprog = vcode.GetBuffer(); - newentry.shader.strpprog = pcode.GetBuffer(); - newentry.shader.strgprog = gcode.GetBuffer(); - } - #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { @@ -421,7 +414,7 @@ void ProgramShaderCache::Init() s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, UBO_LENGTH); // Read our shader cache, only if supported - if (g_ogl_config.bSupportsGLSLCache && !g_Config.bEnableShaderDebugging) + if (g_ogl_config.bSupportsGLSLCache) { GLint Supported; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &Supported); @@ -455,7 +448,7 @@ void ProgramShaderCache::Init() void ProgramShaderCache::Shutdown() { // store all shaders in cache on disk - if (g_ogl_config.bSupportsGLSLCache && !g_Config.bEnableShaderDebugging) + if (g_ogl_config.bSupportsGLSLCache) { for (auto& entry : pshaders) { diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index a137b7ff06..f3562254b9 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -85,7 +85,6 @@ ShaderCode GenerateGeometryShaderCode(API_TYPE ApiType, const geometry_shader_ui "\tint4 " I_TEXOFFSET ";\n" "};\n"); - out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, ""); diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 753a65f9eb..c19528d7d9 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -79,7 +79,6 @@ void VideoConfig::Load(const std::string& ini_file) settings->Get("TexFmtOverlayCenter", &bTexFmtOverlayCenter, 0); settings->Get("WireFrame", &bWireFrame, 0); settings->Get("DisableFog", &bDisableFog, 0); - settings->Get("EnableShaderDebugging", &bEnableShaderDebugging, false); settings->Get("BorderlessFullscreen", &bBorderlessFullscreen, false); settings->Get("SWZComploc", &bZComploc, true); @@ -120,18 +119,6 @@ void VideoConfig::Load(const std::string& ini_file) interface->Get("UsePanicHandlers", &bTmp, true); SetEnableAlert(bTmp); - // Shader Debugging causes a huge slowdown and it's easy to forget about it - // since it's not exposed in the settings dialog. It's only used by - // developers, so displaying an obnoxious message avoids some confusion and - // is not too annoying/confusing for users. - // - // XXX(delroth): This is kind of a bad place to put this, but the current - // VideoCommon is a mess and we don't have a central initialization - // function to do these kind of checks. Instead, the init code is - // triplicated for each video backend. - if (bEnableShaderDebugging) - OSD::AddMessage("Warning: Shader Debugging is enabled, performance will suffer heavily", 15000); - VerifyValidity(); } @@ -299,7 +286,6 @@ void VideoConfig::Save(const std::string& ini_file) settings->Set("TexFmtOverlayCenter", bTexFmtOverlayCenter); settings->Set("Wireframe", bWireFrame); settings->Set("DisableFog", bDisableFog); - settings->Set("EnableShaderDebugging", bEnableShaderDebugging); settings->Set("BorderlessFullscreen", bBorderlessFullscreen); settings->Set("SWZComploc", bZComploc); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index d39bfdd118..d78c63ba90 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -132,9 +132,6 @@ struct VideoConfig final // D3D only config, mostly to be merged into the above int iAdapter; - // Debugging - bool bEnableShaderDebugging; - // VideoSW Debugging int drawStart; int drawEnd;