From ac05d1a593cea074b66a589d2698334d31cb0ca9 Mon Sep 17 00:00:00 2001
From: Jonathan Hamilton <jtrhamilton@gmail.com>
Date: Mon, 25 Jun 2018 22:01:37 -0700
Subject: [PATCH 1/3] Do all arbitrary mipmap detection in integer space

This no longer converts from sRGB to linear for the reference mip
downsample - even if the original mipmap creation tool used an sRGB
colorspace (which isn't really guaranteed, and may even change per
game), this is a "fast" heuristic that's only an estimate anyway.

The average diff is also now stored in a u64, avoiding floating point
calculations in the per-pixel hot loop.

This should speed up the detection significantly, hopefully fixing
jank when loading in new textures.
---
 Source/Core/VideoCommon/TextureCacheBase.cpp | 51 ++++++++++++++------
 1 file changed, 35 insertions(+), 16 deletions(-)
diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp
index 2ea772890a..283f9c597c 100644
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@@ -486,6 +486,7 @@ class ArbitraryMipmapDetector
 {
 private:
   using PixelRGBAf = std::array<float, 4>;
+  using PixelRGBAu8 = std::array<u8, 4>;
 
 public:
   explicit ArbitraryMipmapDetector() = default;
@@ -519,6 +520,12 @@ public:
       const auto& level = levels[i];
       const auto& mip = levels[i + 1];
 
+      u64 level_pixel_count = level.shape.width;
+      level_pixel_count *= level.shape.height;
+
+      // AverageDiff stores the difference sum in a u64, so make sure we can't overflow
+      ASSERT(level_pixel_count < (std::numeric_limits<u64>::max() / (255 * 255 * 4)));
+
       // Manually downsample the past downsample with a simple box blur
       // This is not necessarily close to whatever the original artists used, however
       // It should still be closer than a thing that's not a downscale at all
@@ -568,6 +575,12 @@ private:
       return {{SRGBToLinear(p[0]), SRGBToLinear(p[1]), SRGBToLinear(p[2]), SRGBToLinear(p[3])}};
     }
 
+    static PixelRGBAu8 SampleLinear(const u8* src, const Shape& src_shape, u32 x, u32 y)
+    {
+      const auto* p = src + (x + y * src_shape.row_length) * 4;
+      return {{p[0], p[1], p[2], p[3]}};
+    }
+
     // Puts a downsampled image in dst. dst must be at least width*height*4
     static void Downsample(const u8* src, const Shape& src_shape, u8* dst, const Shape& dst_shape)
     {
@@ -577,29 +590,32 @@ private:
         {
           auto x = j * 2;
           auto y = i * 2;
-          const std::array<PixelRGBAf, 4> samples{{
-              Sample(src, src_shape, x, y),
-              Sample(src, src_shape, x + 1, y),
-              Sample(src, src_shape, x, y + 1),
-              Sample(src, src_shape, x + 1, y + 1),
+          const std::array<PixelRGBAu8, 4> samples{{
+              SampleLinear(src, src_shape, x, y),
+              SampleLinear(src, src_shape, x + 1, y),
+              SampleLinear(src, src_shape, x, y + 1),
+              SampleLinear(src, src_shape, x + 1, y + 1),
           }};
 
           auto* dst_pixel = dst + (j + i * dst_shape.row_length) * 4;
-          dst_pixel[0] =
-              LinearToSRGB((samples[0][0] + samples[1][0] + samples[2][0] + samples[3][0]) * 0.25f);
-          dst_pixel[1] =
-              LinearToSRGB((samples[0][1] + samples[1][1] + samples[2][1] + samples[3][1]) * 0.25f);
-          dst_pixel[2] =
-              LinearToSRGB((samples[0][2] + samples[1][2] + samples[2][2] + samples[3][2]) * 0.25f);
-          dst_pixel[3] =
-              LinearToSRGB((samples[0][3] + samples[1][3] + samples[2][3] + samples[3][3]) * 0.25f);
+          for (int channel = 0; channel < 4; channel++)
+          {
+            uint32_t channel_value = samples[0][channel] + samples[1][channel] +
+                                     samples[2][channel] + samples[3][channel];
+            dst_pixel[channel] = (channel_value + 2) / 4;
+          }
         }
       }
     }
 
     float AverageDiff(const u8* other) const
     {
-      float average_diff = 0.f;
+      // As textures are stored in (at most) 8 bit precision, each channel can
+      // have a max diff of (2^8)^2, multiply by 4 channels = 2^18 per pixel.
+      // That means to overflow, we must have a texture with more than 2^46
+      // pixels - which is way beyond anything the original hardware could do,
+      // and likely a sane assumption going forward for some significant time.
+      u64 current_diff_sum = 0;
       const auto* ptr1 = pixels;
       const auto* ptr2 = other;
       for (u32 i = 0; i < shape.height; ++i)
@@ -615,13 +631,16 @@ private:
             const int diff_squared = diff * diff;
             pixel_diff += diff_squared;
           }
-          average_diff += pixel_diff;
+          current_diff_sum += pixel_diff;
         }
         ptr1 += shape.row_length;
         ptr2 += shape.row_length;
       }
+      // calculate the MSE over all pixels, divide by 2.56 to make it a percent
+      // (IE scale to 0..100 instead of 0..256)
 
-      return average_diff / (shape.width * shape.height * 4) / 2.56f;
+      return std::sqrt(static_cast<float>(current_diff_sum) / (shape.width * shape.height * 4)) /
+             2.56f;
     }
   };
   std::vector<Level> levels;

From 431b9e76be390afe520464b81861aeb27955ddbd Mon Sep 17 00:00:00 2001
From: Jonathan Hamilton <jtrhamilton@gmail.com>
Date: Thu, 28 Jun 2018 02:50:52 +0000
Subject: [PATCH 2/3] Set arbitrary mipmap detection threshold default to 14

Now the detection heuristic has changed, the old value is no longer
valid.

Some example thresholds for known mipmap effects that should trigger:

SMG's lava has a mimimum difference of ~17.8, SMG2's clouds have a
minimum difference of ~14.8, and Wind Waker's foam has a minimum
difference of ~15

Non-triggering examples were tested and all had a calculated difference
lower than 3.

So a value of 14 should lean towards false-negatives instead of
positives, but this is clearly incomplete testing and may require
further tweaks later.
---
 Source/Core/Core/Config/GraphicsSettings.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp
index 98a5c13bcf..3a2a40a6d7 100644
--- a/Source/Core/Core/Config/GraphicsSettings.cpp
+++ b/Source/Core/Core/Config/GraphicsSettings.cpp
@@ -110,7 +110,7 @@ const ConfigInfo<bool> GFX_ENHANCE_DISABLE_COPY_FILTER{
 const ConfigInfo<bool> GFX_ENHANCE_ARBITRARY_MIPMAP_DETECTION{
     {System::GFX, "Enhancements", "ArbitraryMipmapDetection"}, true};
 const ConfigInfo<float> GFX_ENHANCE_ARBITRARY_MIPMAP_DETECTION_THRESHOLD{
-    {System::GFX, "Enhancements", "ArbitraryMipmapDetectionThreshold"}, 4.5f};
+    {System::GFX, "Enhancements", "ArbitraryMipmapDetectionThreshold"}, 14.0f};
 
 // Graphics.Stereoscopy
 

From b54803a074ad8ee02ee5c7073db237acb705bf7b Mon Sep 17 00:00:00 2001
From: Jonathan Hamilton <jtrhamilton@gmail.com>
Date: Mon, 2 Jul 2018 09:46:16 -0700
Subject: [PATCH 3/3] Remove unused sRGB conversion functions

Now the arbitrary mipmap reference downsampling is just done in linear space,
these are no longer used.
---
 Source/Core/VideoCommon/TextureCacheBase.cpp | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp
index 283f9c597c..4a8807954e 100644
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@@ -544,19 +544,6 @@ public:
   }
 
 private:
-  static float SRGBToLinear(u8 srgb_byte)
-  {
-    auto srgb_float = static_cast<float>(srgb_byte) / 256.f;
-    // approximations found on
-    // http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
-    return srgb_float * (srgb_float * (srgb_float * 0.305306011f + 0.682171111f) + 0.012522878f);
-  }
-
-  static u8 LinearToSRGB(float linear)
-  {
-    return static_cast<u8>(std::max(1.055f * std::pow(linear, 0.416666667f) - 0.055f, 0.f) * 256.f);
-  }
-
   struct Shape
   {
     u32 width;
@@ -569,12 +556,6 @@ private:
     Shape shape;
     const u8* pixels;
 
-    static PixelRGBAf Sample(const u8* src, const Shape& src_shape, u32 x, u32 y)
-    {
-      const auto* p = src + (x + y * src_shape.row_length) * 4;
-      return {{SRGBToLinear(p[0]), SRGBToLinear(p[1]), SRGBToLinear(p[2]), SRGBToLinear(p[3])}};
-    }
-
     static PixelRGBAu8 SampleLinear(const u8* src, const Shape& src_shape, u32 x, u32 y)
     {
       const auto* p = src + (x + y * src_shape.row_length) * 4;