EFB2RAM: Apply copy filter as a float coefficient after sampling

Using 8-bit integer math here lead to precision loss for depth copies, which broke various effects in games, e.g. lens flare in MK:DD. It's unlikely the console implements this as a floating-point multiply (fixed-point perhaps), but since we have the float round trip in our EFB2RAM shaders anyway, it's not going to make things any worse. If we do rewrite our shaders to use integer math completely, then it might be worth switching this conversion back to integers. However, the range of the values (format) should be known, or we should expand all values out to 24-bits first.
2025-07-21 05:09:34 -06:00 · 2018-05-22 12:14:48 +10:00
parent 59be5da24c
commit f74dbc794c
10 changed files with 38 additions and 35 deletions
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@ -67,7 +67,7 @@ static void WriteHeader(char*& p, APIType ApiType)
    WRITE(p, "uniform float y_scale;\n");
    WRITE(p, "uniform float gamma_rcp;\n");
    WRITE(p, "uniform float2 clamp_tb;\n");
-    WRITE(p, "uniform int3 filter_coefficients;\n");
+    WRITE(p, "uniform float3 filter_coefficients;\n");
    WRITE(p, "#define samp0 samp9\n");
    WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -79,7 +79,7 @@ static void WriteHeader(char*& p, APIType ApiType)
    WRITE(p, "  float y_scale;\n");
    WRITE(p, "  float gamma_rcp;\n");
    WRITE(p, "  float2 clamp_tb;\n");
-    WRITE(p, "  int3 filter_coefficients;\n");
+    WRITE(p, "  float3 filter_coefficients;\n");
    WRITE(p, "};\n");
    WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -91,7 +91,7 @@ static void WriteHeader(char*& p, APIType ApiType)
    WRITE(p, "  float y_scale;\n");
    WRITE(p, "  float gamma_rcp;\n");
    WRITE(p, "  float2 clamp_tb;\n");
-    WRITE(p, "  int3 filter_coefficients;\n");
+    WRITE(p, "  float3 filter_coefficients;\n");
    WRITE(p, "};\n");
    WRITE(p, "sampler samp0 : register(s0);\n");
    WRITE(p, "Texture2DArray Tex0 : register(t0);\n");
@ -191,21 +191,18 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
    WRITE(p, "  float4 next_row = ");
    WriteSampleOp(1);
    WRITE(p, ";\n");
-    WRITE(
-        p,
-        "  float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
-        "                             int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
-        "                             int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
-        "                            int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
-    WRITE(p, "  return float4(col, current_row.a);\n");
+    WRITE(p, "  return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
+             "                      current_row.rgb * filter_coefficients[1] +\n"
+             "                      next_row.rgb * filter_coefficients[2], \n"
+             "                    float3(1, 1, 1)), current_row.a);\n");
  }
  else
  {
    WRITE(p, "  float4 current_row = ");
    WriteSampleOp(0);
    WRITE(p, ";\n");
-    WRITE(p, "  return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
-             "int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
+    WRITE(p, "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
+             "              current_row.a);\n");
  }
  WRITE(p, "}\n");
 }
@ -1422,4 +1419,4 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form
  return ss.str();
 }

-}  // namespace
+}  // namespace TextureConversionShaderTiled