Video: Clearly separate Texture and EFB Copy formats

Improve bookkeeping around formats. Hopefully make code less confusing. - Rename TlutFormat -> TLUTFormat to follow conventions. - Use enum classes to prevent using a Texture format where an EFB Copy format is expected or vice-versa. - Use common EFBCopyFormat names regardless of depth and YUV configurations.
2025-07-25 15:19:42 -06:00 · 2017-07-30 12:45:55 -07:00
parent 9649494f67
commit c3a57bbad5
27 changed files with 1275 additions and 1319 deletions
--- a/Source/Core/VideoCommon/TextureDecoder_x64.cpp
+++ b/Source/Core/VideoCommon/TextureDecoder_x64.cpp
@ -212,12 +212,13 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
 // free to make the assumption that addresses are multiples of 16 in the aligned case.
 // TODO: complete SSE2 optimization of less often used texture formats.
 // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
-static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height, int texformat,
-                                     const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
+static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height,
+                                     TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
+                                     int Wsteps4, int Wsteps8)
 {
  switch (tlutfmt)
  {
-  case GX_TL_RGB5A3:
+  case TLUTFormat::RGB5A3:
  {
    for (int y = 0; y < height; y += 8)
      for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -226,7 +227,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
  }
  break;

-  case GX_TL_IA8:
+  case TLUTFormat::IA8:
  {
    for (int y = 0; y < height; y += 8)
      for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -235,7 +236,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
  }
  break;

-  case GX_TL_RGB565:
+  case TLUTFormat::RGB565:
  {
    for (int y = 0; y < height; y += 8)
      for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -251,8 +252,8 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei

 FUNCTION_TARGET_SSSE3
 static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, int height,
-                                           int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                           int Wsteps4, int Wsteps8)
+                                           TextureFormat texformat, const u8* tlut,
+                                           TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
  const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
@ -298,8 +299,9 @@ static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, i
  }
 }

-static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height, int texformat,
-                                     const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
+static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height,
+                                     TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
+                                     int Wsteps4, int Wsteps8)
 {
  const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
  const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
@ -390,8 +392,8 @@ static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int hei

 FUNCTION_TARGET_SSSE3
 static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, int height,
-                                           int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                           int Wsteps4, int Wsteps8)
+                                           TextureFormat texformat, const u8* tlut,
+                                           TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  // xsacha optimized with SSSE3 intrinsics
  // Produces a ~10% speed improvement over SSE2 implementation
@ -419,8 +421,9 @@ static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, i
  }
 }

-static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height, int texformat,
-                                     const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
+static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height,
+                                     TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
+                                     int Wsteps4, int Wsteps8)
 {
  // JSD optimized with SSE2 intrinsics.
  // Produces an ~86% speed improvement over reference C implementation.
@ -518,12 +521,13 @@ static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int hei
  }
 }

-static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height, int texformat,
-                                     const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
+static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height,
+                                     TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
+                                     int Wsteps4, int Wsteps8)
 {
  switch (tlutfmt)
  {
-  case GX_TL_RGB5A3:
+  case TLUTFormat::RGB5A3:
  {
    for (int y = 0; y < height; y += 4)
      for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -532,7 +536,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
  }
  break;

-  case GX_TL_IA8:
+  case TLUTFormat::IA8:
  {
    for (int y = 0; y < height; y += 4)
      for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -541,7 +545,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
  }
  break;

-  case GX_TL_RGB565:
+  case TLUTFormat::RGB565:
  {
    for (int y = 0; y < height; y += 4)
      for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -555,8 +559,9 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
  }
 }

-static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height, int texformat,
-                                      const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
+static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height,
+                                      TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
+                                      int Wsteps4, int Wsteps8)
 {
  for (int y = 0; y < height; y += 4)
  {
@ -572,8 +577,8 @@ static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int he

 FUNCTION_TARGET_SSSE3
 static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width, int height,
-                                            int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                            int Wsteps4, int Wsteps8)
+                                            TextureFormat texformat, const u8* tlut,
+                                            TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  // xsacha optimized with SSSE3 intrinsics.
  // Produces an ~50% speed improvement over SSE2 implementation.
@ -595,8 +600,9 @@ static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width,
  }
 }

-static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height, int texformat,
-                                      const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
+static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height,
+                                      TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
+                                      int Wsteps4, int Wsteps8)
 {
  // JSD optimized with SSE2 intrinsics.
  // Produces an ~80% speed improvement over reference C implementation.
@ -656,12 +662,12 @@ static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int he
 }

 static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int height,
-                                        int texformat, const u8* tlut, TlutFormat tlutfmt,
+                                        TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
                                        int Wsteps4, int Wsteps8)
 {
  switch (tlutfmt)
  {
-  case GX_TL_RGB5A3:
+  case TLUTFormat::RGB5A3:
  {
    for (int y = 0; y < height; y += 4)
      for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -670,7 +676,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
  }
  break;

-  case GX_TL_IA8:
+  case TLUTFormat::IA8:
  {
    for (int y = 0; y < height; y += 4)
      for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -679,7 +685,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
  }
  break;

-  case GX_TL_RGB565:
+  case TLUTFormat::RGB565:
  {
    for (int y = 0; y < height; y += 4)
      for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -694,8 +700,8 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
 }

 static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int height,
-                                         int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                         int Wsteps4, int Wsteps8)
+                                         TextureFormat texformat, const u8* tlut,
+                                         TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  // JSD optimized with SSE2 intrinsics.
  // Produces an ~78% speed improvement over reference C implementation.
@ -766,8 +772,8 @@ static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int

 FUNCTION_TARGET_SSSE3
 static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int width, int height,
-                                               int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                               int Wsteps4, int Wsteps8)
+                                               TextureFormat texformat, const u8* tlut,
+                                               TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
  const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
@ -872,8 +878,8 @@ static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int widt
 }

 static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int height,
-                                         int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                         int Wsteps4, int Wsteps8)
+                                         TextureFormat texformat, const u8* tlut,
+                                         TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
  const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
@ -993,8 +999,8 @@ static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int

 FUNCTION_TARGET_SSSE3
 static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width, int height,
-                                              int texformat, const u8* tlut, TlutFormat tlutfmt,
-                                              int Wsteps4, int Wsteps8)
+                                              TextureFormat texformat, const u8* tlut,
+                                              TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
 {
  // xsacha optimized with SSSE3 instrinsics
  // Produces a ~30% speed improvement over SSE2 implementation
@ -1027,7 +1033,7 @@ static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width
 }

 static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int height,
-                                        int texformat, const u8* tlut, TlutFormat tlutfmt,
+                                        TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
                                        int Wsteps4, int Wsteps8)
 {
  // JSD optimized with SSE2 intrinsics
@ -1148,7 +1154,7 @@ static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int
 }

 static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int height,
-                                       int texformat, const u8* tlut, TlutFormat tlutfmt,
+                                       TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
                                       int Wsteps4, int Wsteps8)
 {
  // The metroid games use this format almost exclusively.
@ -1403,19 +1409,19 @@ static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int h
  }
 }

-void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
-                            const u8* tlut, TlutFormat tlutfmt)
+void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
+                            const u8* tlut, TLUTFormat tlutfmt)
 {
  int Wsteps4 = (width + 3) / 4;
  int Wsteps8 = (width + 7) / 8;

  switch (texformat)
  {
-  case GX_TF_C4:
+  case TextureFormat::C4:
    TexDecoder_DecodeImpl_C4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
    break;

-  case GX_TF_I4:
+  case TextureFormat::I4:
    if (cpu_info.bSSSE3)
      TexDecoder_DecodeImpl_I4_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                     Wsteps8);
@ -1423,7 +1429,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
      TexDecoder_DecodeImpl_I4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
    break;

-  case GX_TF_I8:
+  case TextureFormat::I8:
    if (cpu_info.bSSSE3)
      TexDecoder_DecodeImpl_I8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                     Wsteps8);
@ -1431,15 +1437,15 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
      TexDecoder_DecodeImpl_I8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
    break;

-  case GX_TF_C8:
+  case TextureFormat::C8:
    TexDecoder_DecodeImpl_C8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
    break;

-  case GX_TF_IA4:
+  case TextureFormat::IA4:
    TexDecoder_DecodeImpl_IA4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
    break;

-  case GX_TF_IA8:
+  case TextureFormat::IA8:
    if (cpu_info.bSSSE3)
      TexDecoder_DecodeImpl_IA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                      Wsteps8);
@ -1448,17 +1454,17 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
                                Wsteps8);
    break;

-  case GX_TF_C14X2:
+  case TextureFormat::C14X2:
    TexDecoder_DecodeImpl_C14X2(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                Wsteps8);
    break;

-  case GX_TF_RGB565:
+  case TextureFormat::RGB565:
    TexDecoder_DecodeImpl_RGB565(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                 Wsteps8);
    break;

-  case GX_TF_RGB5A3:
+  case TextureFormat::RGB5A3:
    if (cpu_info.bSSSE3)
      TexDecoder_DecodeImpl_RGB5A3_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                         Wsteps8);
@ -1467,7 +1473,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
                                   Wsteps8);
    break;

-  case GX_TF_RGBA8:
+  case TextureFormat::RGBA8:
    if (cpu_info.bSSSE3)
      TexDecoder_DecodeImpl_RGBA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
                                        Wsteps8);
@ -1476,12 +1482,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
                                  Wsteps8);
    break;

-  case GX_TF_CMPR:
+  case TextureFormat::CMPR:
    TexDecoder_DecodeImpl_CMPR(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
    break;

  default:
-    PanicAlert("Unhandled texture format %d", texformat);
+    PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)",
+               static_cast<int>(texformat));
    break;
  }
 }