diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 63acab5517..7d11645949 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -457,7 +457,7 @@ std::unique_ptr ShaderCache::CompilePixelUberShader(const UberShader::PixelShaderUid& uid) const { const ShaderCode source_code = - UberShader::GenPixelShader(m_api_type, m_host_config, uid.GetUidData()); + UberShader::GenPixelShader(m_api_type, m_host_config, uid.GetUidData(), {}); return g_gfx->CreateShaderFromSource(ShaderStage::Pixel, source_code.GetBuffer(), fmt::to_string(*uid.GetUidData())); } diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index f54e42bf60..278be9c082 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -17,6 +17,257 @@ namespace UberShader { +namespace +{ +void WriteCustomShaderStructImpl(ShaderCode* out, u32 num_texgen, bool per_pixel_lighting) +{ + out->Write("\tCustomShaderData custom_data;\n"); + if (per_pixel_lighting) + { + out->Write("\tcustom_data.position = WorldPos;\n"); + out->Write("\tcustom_data.normal = Normal;\n"); + } + else + { + out->Write("\tcustom_data.position = float3(0, 0, 0);\n"); + out->Write("\tcustom_data.normal = float3(0, 0, 0);\n"); + } + + if (num_texgen == 0) [[unlikely]] + { + out->Write("\tcustom_data.texcoord[0] = float3(0, 0, 0);\n"); + } + else + { + for (u32 i = 0; i < num_texgen; ++i) + { + out->Write("\tif (tex{0}.z == 0.0)\n", i); + out->Write("\t{{\n"); + out->Write("\t\tcustom_data.texcoord[{0}] = tex{0};\n", i); + out->Write("\t}}\n"); + out->Write("\telse {{\n"); + out->Write("\t\tcustom_data.texcoord[{0}] = float3(tex{0}.xy / tex{0}.z, 0);\n", i); + out->Write("\t}}\n"); + } + } + + out->Write("\tcustom_data.texcoord_count = {};\n", num_texgen); + + for (u32 i = 0; i < 8; i++) + { + // Shader compilation complains if every index isn't initialized + out->Write("\tcustom_data.texmap_to_texcoord_index[{0}] = {0};\n", i); + } + + for (u32 i = 0; i < NUM_XF_COLOR_CHANNELS; i++) + { + out->Write("\tcustom_data.base_material[{}] = vec4(0, 0, 0, 1);\n", i); + out->Write("\tcustom_data.ambient_lighting[{}] = vec4(0, 0, 0, 1);\n", i); + + // Shader compilation errors can throw if not everything is initialized + for (u32 light_count_index = 0; light_count_index < 8; light_count_index++) + { + // Color + out->Write("\tcustom_data.lights_chan{}_color[{}].direction = float3(0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_color[{}].position = float3(0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_color[{}].color = float3(0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_color[{}].cosatt = float4(0, 0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_color[{}].distatt = float4(0, 0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_color[{}].attenuation_type = 0;\n", i, + light_count_index); + + // Alpha + out->Write("\tcustom_data.lights_chan{}_alpha[{}].direction = float3(0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_alpha[{}].position = float3(0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_alpha[{}].color = float3(0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_alpha[{}].cosatt = float4(0, 0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_alpha[{}].distatt = float4(0, 0, 0, 0);\n", i, + light_count_index); + out->Write("\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = 0;\n", i, + light_count_index); + } + + out->Write("\tcustom_data.light_chan{}_color_count = 0;\n", i); + out->Write("\tcustom_data.light_chan{}_alpha_count = 0;\n", i); + } + + if (num_texgen > 0) [[likely]] + { + out->Write("\n"); + out->Write("\tfor(uint stage = 0u; stage <= num_stages; stage++)\n"); + out->Write("\t{{\n"); + out->Write("\t\tStageState ss;\n"); + out->Write("\t\tss.order = bpmem_tevorder(stage>>1);\n"); + out->Write("\t\tif ((stage & 1u) == 1u)\n"); + out->Write("\t\t\tss.order = ss.order >> {};\n\n", + int(TwoTevStageOrders().enable_tex_odd.StartBit() - + TwoTevStageOrders().enable_tex_even.StartBit())); + out->Write("\t\tuint texmap = {};\n", + BitfieldExtract<&TwoTevStageOrders::texcoord_even>("ss.order")); + // Shader compilation is weird, shader arrays can't use indexing by variable + // to set values unless the variable is an index in a for loop. + // So instead we have to do this if check nonsense + for (u32 i = 0; i < 8; i++) + { + out->Write("\t\tif (texmap == {})\n", i); + out->Write("\t\t{{\n"); + out->Write("\t\t\tcustom_data.texmap_to_texcoord_index[{}] = selectTexCoordIndex(texmap);\n", + i); + out->Write("\t\t}}\n"); + } + out->Write("\t}}\n"); + } + + out->Write("\tuint light_count = 0;\n"); + out->Write("\tfor (uint chan = 0u; chan < {}u; chan++)\n", NUM_XF_COLOR_CHANNELS); + out->Write("\t{{\n"); + out->Write("\t\tuint colorreg = xfmem_color(chan);\n"); + out->Write("\t\tuint alphareg = xfmem_alpha(chan);\n"); + for (const auto& color_type : std::array{"colorreg", "alphareg"}) + { + if (color_type == "colorreg") + { + out->Write("\t\tcustom_data.base_material[0] = " I_MATERIALS "[2u] / 255.0; \n"); + out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type)); + out->Write("\t\t\tcustom_data.base_material[0] = colors_0; \n"); + } + else + { + out->Write("custom_data.base_material[1].w = " I_MATERIALS "[3u].w / 255.0; \n"); + out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type)); + out->Write("\t\t\tcustom_data.base_material[1].w = colors_1.w; \n"); + } + out->Write("\t\tif ({} != 0u)\n", BitfieldExtract<&LitChannel::enablelighting>(color_type)); + out->Write("\t\t{{\n"); + out->Write("\t\t\tuint light_mask = {} | ({} << 4u);\n", + BitfieldExtract<&LitChannel::lightMask0_3>(color_type), + BitfieldExtract<&LitChannel::lightMask4_7>(color_type)); + out->Write("\t\t\tuint attnfunc = {};\n", BitfieldExtract<&LitChannel::attnfunc>(color_type)); + out->Write("\t\t\tfor (uint light_index = 0u; light_index < 8u; light_index++)\n"); + out->Write("\t\t\t{{\n"); + out->Write("\t\t\t\tif ((light_mask & (1u << light_index)) != 0u)\n"); + out->Write("\t\t\t\t{{\n"); + // Shader compilation is weird, shader arrays can't use indexing by variable + // to set values unless the variable is an index in a for loop. + // So instead we have to do this if check nonsense + for (u32 light_count_index = 0; light_count_index < 8; light_count_index++) + { + out->Write("\t\t\t\t\tif (light_index == {})\n", light_count_index); + out->Write("\t\t\t\t\t{{\n"); + if (color_type == "colorreg") + { + for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++) + { + out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index); + out->Write("\t\t\t\t\t\t{{\n"); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].direction = " I_LIGHTS + "[light_index].dir.xyz;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].position = " I_LIGHTS + "[light_index].pos.xyz;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].cosatt = " I_LIGHTS + "[light_index].cosatt;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].distatt = " I_LIGHTS + "[light_index].distatt;\n", + channel_index, light_count_index); + out->Write( + "\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].attenuation_type = attnfunc;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_color[{}].color = " I_LIGHTS + "[light_index].color.rgb / float3(255.0, 255.0, 255.0);\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_color_count += 1;\n", channel_index); + out->Write("\t\t\t\t\t\t}}\n"); + } + } + else + { + for (u32 channel_index = 0; channel_index < NUM_XF_COLOR_CHANNELS; channel_index++) + { + out->Write("\t\t\t\t\t\tif (chan == {})\n", channel_index); + out->Write("\t\t\t\t\t\t{{\n"); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].direction = " I_LIGHTS + "[light_index].dir.xyz;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].position = " I_LIGHTS + "[light_index].pos.xyz;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].cosatt = " I_LIGHTS + "[light_index].cosatt;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].distatt = " I_LIGHTS + "[light_index].distatt;\n", + channel_index, light_count_index); + out->Write( + "\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].attenuation_type = attnfunc;\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.lights_chan{}_alpha[{}].color = float3(" I_LIGHTS + "[light_index].color.a) / float3(255.0, 255.0, 255.0);\n", + channel_index, light_count_index); + out->Write("\t\t\t\t\t\t\tcustom_data.light_chan{}_alpha_count += 1;\n", channel_index); + out->Write("\t\t\t\t\t\t}}\n"); + } + } + + out->Write("\t\t\t\t\t}}\n"); + } + out->Write("\t\t\t\t}}\n"); + out->Write("\t\t\t}}\n"); + out->Write("\t\t}}\n"); + } + out->Write("\t}}\n"); + + for (u32 i = 0; i < 16; i++) + { + // Shader compilation complains if every struct isn't initialized + + // Color Input + for (u32 j = 0; j < 4; j++) + { + out->Write("\tcustom_data.tev_stages[{}].input_color[{}].input_type = " + "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n", + i, j); + out->Write("\tcustom_data.tev_stages[{}].input_color[{}].value = " + "float3(0, 0, 0);\n", + i, j); + } + + // Alpha Input + for (u32 j = 0; j < 4; j++) + { + out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].input_type = " + "CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_UNUSED;\n", + i, j); + out->Write("\tcustom_data.tev_stages[{}].input_alpha[{}].value = " + "float(0);\n", + i, j); + } + + // Texmap + out->Write("\tcustom_data.tev_stages[{}].texmap = 0u;\n", i); + + // Output + out->Write("\tcustom_data.tev_stages[{}].output_color = " + "float4(0, 0, 0, 0);\n", + i); + } + + // Actual data will be filled out in the tev stage code, just set the + // stage count for now + out->Write("\tcustom_data.tev_stage_count = num_stages;\n"); +} +} // namespace PixelShaderUid GetPixelShaderUid() { PixelShaderUid out; @@ -56,7 +307,8 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos } ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, - const pixel_ubershader_uid_data* uid_data) + const pixel_ubershader_uid_data* uid_data, + const CustomPixelShaderContents& custom_details) { const bool per_pixel_lighting = host_config.per_pixel_lighting; const bool msaa = host_config.msaa; @@ -76,6 +328,12 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("// {}\n", *uid_data); WriteBitfieldExtractHeader(out, api_type, host_config); WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box); + WriteCustomShaderStructDef(&out, numTexgen); + for (std::size_t i = 0; i < custom_details.shaders.size(); i++) + { + const auto& shader_details = custom_details.shaders[i]; + out.Write(fmt::runtime(shader_details.custom_shader), i); + } if (per_pixel_lighting) WriteLightingFunction(out); @@ -228,6 +486,68 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, } out.Write("}}\n\n"); + + out.Write("uint selectTexCoordIndex(uint texmap)"); + out.Write("{{\n"); + + if (api_type == APIType::D3D) + { + out.Write(" switch (texmap) {{\n"); + for (u32 i = 0; i < numTexgen; i++) + { + out.Write(" case {}u:\n" + " return {};\n", + i, i); + } + out.Write(" default:\n" + " return 0;\n" + " }}\n"); + } + else + { + out.Write(" if (texmap >= {}u) {{\n", numTexgen); + out.Write(" return 0;\n" + " }}\n"); + if (numTexgen > 4) + out.Write(" if (texmap < 4u) {{\n"); + if (numTexgen > 2) + out.Write(" if (texmap < 2u) {{\n"); + if (numTexgen > 1) + out.Write(" return (texmap == 0u) ? 0 : 1;\n"); + else + out.Write(" return 0;\n"); + if (numTexgen > 2) + { + out.Write(" }} else {{\n"); // >= 2 < min(4, numTexgen) + if (numTexgen > 3) + out.Write(" return (texmap == 2u) ? 2 : 3;\n"); + else + out.Write(" return 2;\n"); + out.Write(" }}\n"); + } + if (numTexgen > 4) + { + out.Write(" }} else {{\n"); // >= 4 < min(8, numTexgen) + if (numTexgen > 6) + out.Write(" if (texmap < 6u) {{\n"); + if (numTexgen > 5) + out.Write(" return (texmap == 4u) ? 4 : 5;\n"); + else + out.Write(" return 4;\n"); + if (numTexgen > 6) + { + out.Write(" }} else {{\n"); // >= 6 < min(8, numTexgen) + if (numTexgen > 7) + out.Write(" return (texmap == 6u) ? 6 : 7;\n"); + else + out.Write(" return 6;\n"); + out.Write(" }}\n"); + } + out.Write(" }}\n"); + } + } + + out.Write("}}\n\n"); } // ===================== @@ -316,43 +636,43 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // TEV's Special Lerp // ====================== const auto WriteTevLerp = [&out](std::string_view components) { - out.Write( - "// TEV's Linear Interpolate, plus bias, add/subtract and scale\n" - "int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, " - "uint scale) {{\n" - " // Scale C from 0..255 to 0..256\n" - " C += C >> 7;\n" - "\n" - " // Add bias to D\n" - " if (bias == 1u) D += 128;\n" - " else if (bias == 2u) D -= 128;\n" - "\n" - " int{0} lerp = (A << 8) + (B - A)*C;\n" - " if (scale != 3u) {{\n" - " lerp = lerp << scale;\n" - " D = D << scale;\n" - " }}\n" - "\n" - " // TODO: Is this rounding bias still added when the scale is divide by 2? Currently we " - "do not apply it.\n" - " if (scale != 3u)\n" - " lerp = lerp + (op ? 127 : 128);\n" - "\n" - " int{0} result = lerp >> 8;\n" - "\n" - " // Add/Subtract D\n" - " if (op) // Subtract\n" - " result = D - result;\n" - " else // Add\n" - " result = D + result;\n" - "\n" - " // Most of the Scale was moved inside the lerp for improved precision\n" - " // But we still do the divide by 2 here\n" - " if (scale == 3u)\n" - " result = result >> 1;\n" - " return result;\n" - "}}\n\n", - components); + out.Write("// TEV's Linear Interpolate, plus bias, add/subtract and scale\n" + "int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, " + "uint scale) {{\n" + " // Scale C from 0..255 to 0..256\n" + " C += C >> 7;\n" + "\n" + " // Add bias to D\n" + " if (bias == 1u) D += 128;\n" + " else if (bias == 2u) D -= 128;\n" + "\n" + " int{0} lerp = (A << 8) + (B - A)*C;\n" + " if (scale != 3u) {{\n" + " lerp = lerp << scale;\n" + " D = D << scale;\n" + " }}\n" + "\n" + " // TODO: Is this rounding bias still added when the scale is divide by 2? " + "Currently we " + "do not apply it.\n" + " if (scale != 3u)\n" + " lerp = lerp + (op ? 127 : 128);\n" + "\n" + " int{0} result = lerp >> 8;\n" + "\n" + " // Add/Subtract D\n" + " if (op) // Subtract\n" + " result = D - result;\n" + " else // Add\n" + " result = D + result;\n" + "\n" + " // Most of the Scale was moved inside the lerp for improved precision\n" + " // But we still do the divide by 2 here\n" + " if (scale == 3u)\n" + " result = result >> 1;\n" + " return result;\n" + "}}\n\n", + components); }; WriteTevLerp(""); // int WriteTevLerp("3"); // int3 @@ -437,6 +757,25 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "return int3(0, 0, 0);", // ZERO }; + static constexpr Common::EnumMap tev_c_input_type{ + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", + }; + static constexpr Common::EnumMap tev_a_input_table{ "return s.Reg[0].a;", // APREV, "return s.Reg[1].a;", // A0, @@ -448,6 +787,17 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "return 0;", // ZERO }; + static constexpr Common::EnumMap tev_a_input_type{ + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_PREV;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_COLOR;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_TEX;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_RAS;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_KONST;", + "return CUSTOM_SHADER_TEV_STAGE_INPUT_TYPE_NUMERIC;", + }; + static constexpr Common::EnumMap tev_regs_lookup_table{ "return s.Reg[0];", "return s.Reg[1];", @@ -489,6 +839,16 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("}}\n" "\n"); + out.Write("// Helper function for Custom Shader Input Type\n" + "uint getColorInputType(uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_c_input_type, 2, false); + out.Write("}}\n" + "\n" + "uint getAlphaInputType(uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_a_input_type, 2, false); + out.Write("}}\n" + "\n"); + // Since the fixed-point texture coodinate variables aren't global, we need to pass // them to the select function. This applies to all backends. if (numTexgen > 0) @@ -505,6 +865,17 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("void main()\n{{\n"); out.Write(" float4 rawpos = gl_FragCoord;\n"); + out.Write(" uint num_stages = {};\n\n", + BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode")); + + bool has_custom_shader_details = false; + if (std::any_of(custom_details.shaders.begin(), custom_details.shaders.end(), + [](const std::optional& ps) { return ps.has_value(); })) + { + WriteCustomShaderStructImpl(&out, numTexgen, per_pixel_lighting); + has_custom_shader_details = true; + } + if (use_framebuffer_fetch) { // Store off a copy of the initial framebuffer value. @@ -563,9 +934,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " // o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);\n"); } - out.Write(" uint num_stages = {};\n\n", - BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode")); - out.Write(" // Main tev loop\n"); out.Write(" for(uint stage = 0u; stage <= num_stages; stage++)\n" @@ -618,9 +986,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // indirect texture stage is enabled). If the matrix is off, the result doesn't matter; if the // indirect texture stage is disabled, the result is undefined (and produces a glitchy pattern // on hardware, different from this). - // For the undefined case, we just skip applying the indirect operation, which is close enough. - // Viewtiful Joe hits the undefined case (bug 12525). - // Wrapping and add to previous still apply in this case (and when the stage is disabled). + // For the undefined case, we just skip applying the indirect operation, which is close + // enough. Viewtiful Joe hits the undefined case (bug 12525). Wrapping and add to previous + // still apply in this case (and when the stage is disabled). out.Write(" if (bpmem_iref(bt) != 0u) {{\n"); out.Write(" int3 indcoord;\n"); LookupIndirectTexture("indcoord", "bt"); @@ -826,7 +1194,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " alpha_B = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_b) & 255;\n" " }};\n" " int alpha_C = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_c) & 255;\n" - " int alpha_D = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_d); // 10 bits " + " int alpha_D = selectAlphaInput(s, ss, {0}colors_0, {0}colors_1, alpha_d); // 10 " + "bits " "+ sign\n" "\n", // TODO: do we need to sign extend? color_input_prefix); @@ -857,9 +1226,81 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "\n" " // Write result to the correct input register of the next stage\n"); WriteSwitch(out, api_type, "alpha_dest", tev_a_set_table, 6, true); - out.Write(" }}\n" - " }} // Main TEV loop\n" - "\n"); + if (has_custom_shader_details) + { + for (u32 stage_index = 0; stage_index < 16; stage_index++) + { + out.Write("\tif (stage == {}u) {{\n", stage_index); + // Color input + out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].value = color_A / float3(255.0, " + "255.0, 255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[0].input_type = " + "getColorInputType(color_a);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].value = color_B / float3(255.0, " + "255.0, 255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[1].input_type = " + "getColorInputType(color_b);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].value = color_C / float3(255.0, " + "255.0, 255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[2].input_type = " + "getColorInputType(color_c);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].value = color_D / float3(255.0, " + "255.0, 255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_color[3].input_type = " + "getColorInputType(color_c);\n", + stage_index); + + // Alpha input + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].value = alpha_A / float(255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[0].input_type = " + "getAlphaInputType(alpha_a);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].value = alpha_B / float(255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[1].input_type = " + "getAlphaInputType(alpha_b);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].value = alpha_C / float(255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[2].input_type = " + "getAlphaInputType(alpha_c);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].value = alpha_D / float(255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].input_alpha[3].input_type = " + "getAlphaInputType(alpha_d);\n", + stage_index); + + if (numTexgen != 0) + { + // Texmap + out.Write("\t\tif (texture_enabled) {{\n"); + out.Write("\t\t\tuint sampler_num = {};\n", + BitfieldExtract<&TwoTevStageOrders::texmap_even>("ss.order")); + out.Write("\t\tcustom_data.tev_stages[{}].texmap = sampler_num;\n", stage_index); + out.Write("\t\t}}\n"); + } + + // Output + out.Write("\t\tcustom_data.tev_stages[{}].output_color.rgb = color / float3(255.0, 255.0, " + "255.0);\n", + stage_index); + out.Write("\t\tcustom_data.tev_stages[{}].output_color.a = alpha / float(255.0);\n", + stage_index); + out.Write("\t}}\n"); + } + } + out.Write(" }}\n"); + out.Write(" }} // Main TEV loop\n"); + out.Write("\n"); // Select the output color and alpha registers from the last stage. out.Write(" int4 TevResult;\n"); @@ -942,8 +1383,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, { // Instead of using discard, fetch the framebuffer's color value and use it as the output // for this fragment. - out.Write( - " #define discard_fragment {{ real_ocol0 = float4(initial_ocol0.xyz, 1.0); return; }}\n"); + out.Write(" #define discard_fragment {{ real_ocol0 = float4(initial_ocol0.xyz, 1.0); " + "return; }}\n"); } else { @@ -1109,8 +1550,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " }}\n"); } - // Some backends require that the shader outputs be uint when writing to a uint render target for - // logic op. + // Some backends require that the shader outputs be uint when writing to a uint render target + // for logic op. if (uid_data->uint_output) { out.Write(" if (bpmem_rgba6_format)\n" @@ -1142,6 +1583,19 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, } } + for (std::size_t i = 0; i < custom_details.shaders.size(); i++) + { + const auto& shader_details = custom_details.shaders[i]; + + if (!shader_details.custom_shader.empty()) + { + out.Write("\t{{\n"); + out.Write("\t\tcustom_data.final_color = ocol0;\n"); + out.Write("\t\tocol0.xyz = {}_{}(custom_data).xyz;\n", CUSTOM_PIXELSHADER_COLOR_FUNC, i); + out.Write("\t}}\n\n"); + } + } + if (bounding_box) { out.Write(" if (bpmem_bounding_box) {{\n" @@ -1209,13 +1663,13 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, WriteSwitch(out, api_type, "blend_dst_factor", blendDstFactor, 4, true); WriteSwitch(out, api_type, "blend_dst_factor_alpha", blendDstFactorAlpha, 4, true); - out.Write( - " float4 blend_result;\n" - " if (blend_subtract)\n" - " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n" - " else\n" - " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * " - "blend_src.rgb;\n"); + out.Write(" float4 blend_result;\n" + " if (blend_subtract)\n" + " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * " + "blend_src.rgb;\n" + " else\n" + " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * " + "blend_src.rgb;\n"); out.Write(" if (blend_subtract_alpha)\n" " blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n" diff --git a/Source/Core/VideoCommon/UberShaderPixel.h b/Source/Core/VideoCommon/UberShaderPixel.h index aa3d6c7625..80560688ca 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.h +++ b/Source/Core/VideoCommon/UberShaderPixel.h @@ -29,7 +29,8 @@ using PixelShaderUid = ShaderUid; PixelShaderUid GetPixelShaderUid(); ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, - const pixel_ubershader_uid_data* uid_data); + const pixel_ubershader_uid_data* uid_data, + const CustomPixelShaderContents& custom_details); void EnumeratePixelShaderUids(const std::function& callback); void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config,