VideoCommon: Add texture decoding shader generators

2025-07-24 06:39:46 -06:00 · 2016-11-27 18:14:58 +10:00
parent 82fd984f3e
commit 6ffc16d1b0
2 changed files with 586 additions and 1 deletions
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@ -2,9 +2,13 @@
 // Licensed under GPLv2+
 // Refer to the license.txt file included.

+#include <array>
 #include <cmath>
 #include <cstdio>
+#include <map>
+#include <sstream>

+#include "Common/CommonFuncs.h"
 #include "Common/CommonTypes.h"
 #include "Common/MathUtil.h"
 #include "Common/MsgHandler.h"
@ -720,4 +724,546 @@ const char* GenerateEncodingShader(u32 format, APIType ApiType)
  return text;
 }

+// NOTE: In these uniforms, a row refers to a row of blocks, not texels.
+static const char decoding_shader_header[] = R"(
+#ifdef VULKAN
+
+layout(std140, push_constant) uniform PushConstants {
+  uvec2 dst_size;
+  uvec2 src_size;
+  uint src_offset;
+  uint src_row_stride;
+  uint palette_offset;
+} push_constants;
+#define u_dst_size (push_constants.dst_size)
+#define u_src_size (push_constants.src_size)
+#define u_src_offset (push_constants.src_offset)
+#define u_src_row_stride (push_constants.src_row_stride)
+#define u_palette_offset (push_constants.palette_offset)
+
+TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
+TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
+
+IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
+
+#else
+
+uniform uvec2 u_dst_size;
+uniform uvec2 u_src_size;
+uniform uint u_src_offset;
+uniform uint u_src_row_stride;
+uniform uint u_palette_offset;
+
+SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer;
+SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer;
+
+layout(rgba8, binding = 0) uniform writeonly image2DArray output_image;
+
+#endif
+
+uint Swap16(uint v)
+{
+  // Convert BE to LE.
+  return ((v >> 8) | (v << 8)) & 0xFFFFu;
+}
+
+uint Convert3To8(uint v)
+{
+  // Swizzle bits: 00000123 -> 12312312
+  return (v << 5) | (v << 2) | (v >> 1);
+}
+uint Convert4To8(uint v)
+{
+  // Swizzle bits: 00001234 -> 12341234
+  return (v << 4) | v;
+}
+uint Convert5To8(uint v)
+{
+  // Swizzle bits: 00012345 -> 12345123
+  return (v << 3) | (v >> 2);
+}
+uint Convert6To8(uint v)
+{
+  // Swizzle bits: 00123456 -> 12345612
+  return (v << 2) | (v >> 4);
+}
+
+uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords)
+{
+  uvec2 block = coords / block_size;
+  uvec2 offset = coords % block_size;
+  uint buffer_pos = u_src_offset;
+  buffer_pos += block.y * u_src_row_stride;
+  buffer_pos += block.x * (block_size.x * block_size.y);
+  buffer_pos += offset.y * block_size.x;
+  buffer_pos += offset.x;
+  return buffer_pos;
+}
+
+uvec4 GetPaletteColor(uint index)
+{
+  // Fetch and swap BE to LE.
+  uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
+
+  uvec4 color;
+#if defined(PALETTE_FORMAT_IA8)
+  uint a = bitfieldExtract(val, 8, 8);
+  uint i = bitfieldExtract(val, 0, 8);
+  color = uvec4(i, i, i, a);
+#elif defined(PALETTE_FORMAT_RGB565)
+  color.x = Convert5To8(bitfieldExtract(val, 11, 5));
+  color.y = Convert6To8(bitfieldExtract(val, 5, 6));
+  color.z = Convert5To8(bitfieldExtract(val, 0, 5));
+  color.a = 255u;
+
+#elif defined(PALETTE_FORMAT_RGB5A3)
+  if ((val & 0x8000u) != 0u)
+  {
+    color.x = Convert5To8(bitfieldExtract(val, 10, 5));
+    color.y = Convert5To8(bitfieldExtract(val, 5, 5));
+    color.z = Convert5To8(bitfieldExtract(val, 0, 5));
+    color.a = 255u;
+  }
+  else
+  {
+    color.a = Convert3To8(bitfieldExtract(val, 12, 3));
+    color.r = Convert4To8(bitfieldExtract(val, 8, 4));
+    color.g = Convert4To8(bitfieldExtract(val, 4, 4));
+    color.b = Convert4To8(bitfieldExtract(val, 0, 4));
+  }
+#else
+  // Not used.
+  color = uvec4(0, 0, 0, 0);
+#endif
+
+  return color;
+}
+
+vec4 GetPaletteColorNormalized(uint index)
+{
+  uvec4 color = GetPaletteColor(index);
+  return vec4(color) / 255.0;
+}
+
+)";
+
+static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
+    {GX_TF_I4,
+     {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 8x8 blocks, 4 bits per pixel
+        // We need to do the tiling manually here because the texel size is smaller than
+        // the size of the buffer elements.
+        uint2 block = coords.xy / 8u;
+        uint2 offset = coords.xy % 8u;
+        uint buffer_pos = u_src_offset;
+        buffer_pos += block.y * u_src_row_stride;
+        buffer_pos += block.x * 32u;
+        buffer_pos += offset.y * 4u;
+        buffer_pos += offset.x / 2u;
+
+        // Select high nibble for odd texels, low for even.
+        uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
+        uint i;
+        if ((coords.x & 1u) == 0u)
+          i = Convert4To8((val >> 4));
+        else
+          i = Convert4To8((val & 0x0Fu));
+
+        uvec4 color = uvec4(i, i, i, i);
+        vec4 norm_color = vec4(color) / 255.0;
+
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+
+      )"}},
+    {GX_TF_IA4,
+     {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 8x4 blocks, 8 bits per pixel
+        uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
+        uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
+        uint i = Convert4To8((val & 0x0Fu));
+        uint a = Convert4To8((val >> 4));
+        uvec4 color = uvec4(i, i, i, a);
+        vec4 norm_color = vec4(color) / 255.0;
+
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+      )"}},
+    {GX_TF_I8,
+     {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 8x4 blocks, 8 bits per pixel
+        uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
+        uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
+        uvec4 color = uvec4(i, i, i, i);
+        vec4 norm_color = vec4(color) / 255.0;
+
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+      )"}},
+    {GX_TF_IA8,
+     {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 4x4 blocks, 16 bits per pixel
+        uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
+        uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
+        uint a = (val & 0xFFu);
+        uint i = (val >> 8);
+        uvec4 color = uvec4(i, i, i, a);
+        vec4 norm_color = vec4(color) / 255.0;
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+      )"}},
+    {GX_TF_RGB565,
+     {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 4x4 blocks
+        uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
+        uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
+
+        uvec4 color;
+        color.x = Convert5To8(bitfieldExtract(val, 11, 5));
+        color.y = Convert6To8(bitfieldExtract(val, 5, 6));
+        color.z = Convert5To8(bitfieldExtract(val, 0, 5));
+        color.a = 255u;
+
+        vec4 norm_color = vec4(color) / 255.0;
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+
+      )"}},
+    {GX_TF_RGB5A3,
+     {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 4x4 blocks
+        uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
+        uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
+
+        uvec4 color;
+        if ((val & 0x8000u) != 0u)
+        {
+          color.x = Convert5To8(bitfieldExtract(val, 10, 5));
+          color.y = Convert5To8(bitfieldExtract(val, 5, 5));
+          color.z = Convert5To8(bitfieldExtract(val, 0, 5));
+          color.a = 255u;
+        }
+        else
+        {
+          color.a = Convert3To8(bitfieldExtract(val, 12, 3));
+          color.r = Convert4To8(bitfieldExtract(val, 8, 4));
+          color.g = Convert4To8(bitfieldExtract(val, 4, 4));
+          color.b = Convert4To8(bitfieldExtract(val, 0, 4));
+        }
+
+        vec4 norm_color = vec4(color) / 255.0;
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+
+      )"}},
+    {GX_TF_RGBA8,
+     {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 4x4 blocks
+        // We can't use the normal calculation function, as these are packed as the AR channels
+        // for the entire block, then the GB channels afterwards.
+        uint2 block = coords.xy / 4u;
+        uint2 offset = coords.xy % 4u;
+        uint buffer_pos = u_src_offset;
+
+        // Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
+        buffer_pos += block.y * u_src_row_stride;
+        buffer_pos += block.x * 32u;
+        buffer_pos += offset.y * 4u;
+        buffer_pos += offset.x;
+
+        // The two GB channels follow after the block's AR channels.
+        uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
+        uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
+
+        uvec4 color;
+        color.a = (val1 & 0xFFu);
+        color.r = (val1 >> 8);
+        color.g = (val2 & 0xFFu);
+        color.b = (val2 >> 8);
+
+        vec4 norm_color = vec4(color) / 255.0;
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+      )"}},
+    {GX_TF_CMPR,
+     {BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
+      R"(
+      // In the compute version of this decoder, we flatten the blocks to a one-dimension array.
+      // Each group is subdivided into 16, and the first thread in each group fetches the DXT data.
+      // All threads then calculate the possible colors for the block and write to the output image.
+
+      #define GROUP_SIZE 64u
+      #define BLOCK_SIZE_X 4u
+      #define BLOCK_SIZE_Y 4u
+      #define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y)
+      #define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE)
+
+      layout(local_size_x = GROUP_SIZE, local_size_y = 1) in;
+
+      shared uvec2 shared_temp[BLOCKS_PER_GROUP];
+
+      uint DXTBlend(uint v1, uint v2)
+      {
+        // 3/8 blend, which is close to 1/3
+        return ((v1 * 3u + v2 * 5u) >> 3);
+      }
+
+      void main()
+      {
+        uint local_thread_id = gl_LocalInvocationID.x;
+        uint block_in_group = local_thread_id / BLOCK_SIZE;
+        uint thread_in_block = local_thread_id % BLOCK_SIZE;
+        uint block_index = gl_WorkGroupID.x * BLOCKS_PER_GROUP + block_in_group;
+
+        // Annoyingly, we can't precalculate this as a uniform because the DXT block size differs
+        // from the block size of the overall texture (4 vs 8). We can however use a multiply and
+        // subtraction to avoid the modulo for calculating the block's X coordinate.
+        uint blocks_wide = u_src_size.x / BLOCK_SIZE_X;
+        uvec2 block_coords;
+        block_coords.y = block_index / blocks_wide;
+        block_coords.x = block_index - (block_coords.y * blocks_wide);
+
+        // Only the first thread for each block reads from the texel buffer.
+        if (thread_in_block == 0u)
+        {
+          // Calculate tiled block coordinates.
+          uvec2 tile_block_coords = block_coords / 2u;
+          uvec2 subtile_block_coords = block_coords % 2u;
+          uint buffer_pos = u_src_offset;
+          buffer_pos += tile_block_coords.y * u_src_row_stride;
+          buffer_pos += tile_block_coords.x * 4u;
+          buffer_pos += subtile_block_coords.y * 2u;
+          buffer_pos += subtile_block_coords.x;
+
+          // Read the entire DXT block to shared memory.
+          uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
+          shared_temp[block_in_group] = raw_data;
+        }
+
+        // Ensure store is completed before the remaining threads in the block continue.
+        memoryBarrierShared();
+        barrier();
+
+        // Unpack colors and swap BE to LE.
+        uvec2 raw_data = shared_temp[block_in_group];
+        uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8);
+        uint c1 = swapped & 0xFFFFu;
+        uint c2 = swapped >> 16;
+
+        // Expand 5/6 bit channels to 8-bits per channel.
+        uint blue1 = Convert5To8(bitfieldExtract(c1, 0, 5));
+        uint blue2 = Convert5To8(bitfieldExtract(c2, 0, 5));
+        uint green1 = Convert6To8(bitfieldExtract(c1, 5, 6));
+        uint green2 = Convert6To8(bitfieldExtract(c2, 5, 6));
+        uint red1 = Convert5To8(bitfieldExtract(c1, 11, 5));
+        uint red2 = Convert5To8(bitfieldExtract(c2, 11, 5));
+
+        // Determine the four colors the block can use.
+        // It's quicker to just precalculate all four colors rather than branching on the index.
+        // NOTE: These must be masked with 0xFF. This is done at the normalization stage below.
+        uvec4 color0, color1, color2, color3;
+        color0 = uvec4(red1, green1, blue1, 255u);
+        color1 = uvec4(red2, green2, blue2, 255u);
+        if (c1 > c2)
+        {
+          color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u);
+          color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u);
+        }
+        else
+        {
+          color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u);
+          color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u);
+        }
+
+        // Calculate the texel coordinates that we will write to.
+        // The divides/modulo here should be turned into a shift/binary AND.
+        uint local_y = thread_in_block / BLOCK_SIZE_X;
+        uint local_x = thread_in_block % BLOCK_SIZE_X;
+        uint global_x = block_coords.x * BLOCK_SIZE_X + local_x;
+        uint global_y = block_coords.y * BLOCK_SIZE_Y + local_y;
+
+        // Use the coordinates within the block to shift the 32-bit value containing
+        // all 16 indices to a single 2-bit index.
+        uint index = bitfieldExtract(raw_data.y, int((local_y * 8u) + (6u - local_x * 2u)), 2);
+
+        // Select the un-normalized color from the precalculated color array.
+        // Using a switch statement here removes the need for dynamic indexing of an array.
+        uvec4 color;
+        switch (index)
+        {
+        case 0u:  color = color0;   break;
+        case 1u:  color = color1;   break;
+        case 2u:  color = color2;   break;
+        case 3u:  color = color3;   break;
+        default:  color = color0;   break;
+        }
+
+        // Normalize and write to the output image.
+        vec4 norm_color = vec4(color & 0xFFu) / 255.0;
+        imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
+      }
+      )"}},
+    {GX_TF_C4,
+     {BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C4)), 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 8x8 blocks, 4 bits per pixel
+        // We need to do the tiling manually here because the texel size is smaller than
+        // the size of the buffer elements.
+        uint2 block = coords.xy / 8u;
+        uint2 offset = coords.xy % 8u;
+        uint buffer_pos = u_src_offset;
+        buffer_pos += block.y * u_src_row_stride;
+        buffer_pos += block.x * 32u;
+        buffer_pos += offset.y * 4u;
+        buffer_pos += offset.x / 2u;
+
+        // Select high nibble for odd texels, low for even.
+        uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
+        uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
+        vec4 norm_color = GetPaletteColorNormalized(index);
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+
+      )"}},
+    {GX_TF_C8,
+     {BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C8)), 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 8x4 blocks, 8 bits per pixel
+        uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
+        uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
+        vec4 norm_color = GetPaletteColorNormalized(index);
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+      )"}},
+    {GX_TF_C14X2,
+     {BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C14X2)), 8, 8, false,
+      R"(
+      layout(local_size_x = 8, local_size_y = 8) in;
+
+      void main()
+      {
+        uvec2 coords = gl_GlobalInvocationID.xy;
+
+        // Tiled in 4x4 blocks, 16 bits per pixel
+        uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
+        uint index = texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
+        vec4 norm_color = GetPaletteColorNormalized(index);
+        imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
+      }
+      )"}}};
+
+static const std::array<u32, BUFFER_FORMAT_COUNT> s_buffer_bytes_per_texel = {{
+    1,  // BUFFER_FORMAT_R8_UINT
+    2,  // BUFFER_FORMAT_R16_UINT
+    8,  // BUFFER_FORMAT_R32G32_UINT
+}};
+
+const DecodingShaderInfo* GetDecodingShaderInfo(u32 format)
+{
+  auto iter = s_decoding_shader_info.find(static_cast<TextureFormat>(format));
+  return iter != s_decoding_shader_info.end() ? &iter->second : nullptr;
+}
+
+u32 GetBytesPerBufferElement(BufferFormat buffer_format)
+{
+  return s_buffer_bytes_per_texel[buffer_format];
+}
+
+std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height)
+{
+  // Flatten to a single dimension?
+  if (info->group_flatten)
+    return {(width * height + (info->group_size_x - 1)) / info->group_size_x, 1};
+
+  return {(width + (info->group_size_x - 1)) / info->group_size_x,
+          (height + (info->group_size_y - 1)) / info->group_size_y};
+}
+
+std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type)
+{
+  const DecodingShaderInfo* info = GetDecodingShaderInfo(format);
+  if (!info)
+    return "";
+
+  std::stringstream ss;
+  switch (palette_format)
+  {
+  case GX_TL_IA8:
+    ss << "#define PALETTE_FORMAT_IA8 1\n";
+    break;
+  case GX_TL_RGB565:
+    ss << "#define PALETTE_FORMAT_RGB565 1\n";
+    break;
+  case GX_TL_RGB5A3:
+    ss << "#define PALETTE_FORMAT_RGB5A3 1\n";
+    break;
+  }
+
+  ss << decoding_shader_header;
+  ss << info->shader_body;
+
+  return ss.str();
+}
+
 }  // namespace
--- a/Source/Core/VideoCommon/TextureConversionShader.h
+++ b/Source/Core/VideoCommon/TextureConversionShader.h
@ -4,6 +4,9 @@

 #pragma once

+#include <string>
+#include <utility>
+
 #include "Common/CommonTypes.h"

 enum class APIType;
@ -13,4 +16,40 @@ namespace TextureConversionShader
 u16 GetEncodedSampleCount(u32 format);

 const char* GenerateEncodingShader(u32 format, APIType ApiType);
-}
+
+// View format of the input data to the texture decoding shader.
+enum BufferFormat
+{
+  BUFFER_FORMAT_R8_UINT,
+  BUFFER_FORMAT_R16_UINT,
+  BUFFER_FORMAT_R32G32_UINT,
+  BUFFER_FORMAT_COUNT
+};
+
+// Information required to compile and dispatch a texture decoding shader.
+struct DecodingShaderInfo
+{
+  BufferFormat buffer_format;
+  u32 palette_size;
+  u32 group_size_x;
+  u32 group_size_y;
+  bool group_flatten;
+  const char* shader_body;
+};
+
+// Obtain shader information for the specified texture format.
+// If this format does not have a shader written for it, returns nullptr.
+const DecodingShaderInfo* GetDecodingShaderInfo(u32 format);
+
+// Determine how many bytes there are in each element of the texel buffer.
+// Needed for alignment and stride calculations.
+u32 GetBytesPerBufferElement(BufferFormat buffer_format);
+
+// Determine how many thread groups should be dispatched for an image of the specified width/height.
+// First is the number of X groups, second is the number of Y groups, Z is always one.
+std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
+
+// Returns the GLSL string containing the texture decoding shader for the specified format.
+std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type);
+
+}  // namespace TextureConversionShader