VideoBackends: Add Metal renderer

This commit is contained in:
TellowKrinkle
2022-06-01 04:58:13 -05:00
parent b0b5faa793
commit 716c0980d7
42 changed files with 3714 additions and 47 deletions

View File

@ -34,6 +34,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -55,6 +56,7 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texture(samp{}, {})", n, coords);
@ -72,6 +74,7 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
@ -89,6 +92,7 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -138,6 +142,7 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{

View File

@ -113,8 +113,8 @@ public:
virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {}
// Dispatching compute shaders with currently-bound state.
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
}

View File

@ -60,6 +60,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
case APIType::D3D:
filename += "D3D";
break;
case APIType::Metal:
filename += "Metal";
break;
case APIType::OpenGL:
filename += "OpenGL";
break;

View File

@ -55,7 +55,7 @@ CompileShaderToSPV(EShLanguage stage, APIType api_type,
glslang::TShader::ForbidIncluder includer;
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules);
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
messages = static_cast<EShMessages>(messages | EShMsgVulkanRules);
int default_version = 450;

View File

@ -2924,7 +2924,8 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con
auto dispatch_groups =
TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height);
g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1);
g_renderer->DispatchComputeShader(shader, info->group_size_x, info->group_size_y, 1,
dispatch_groups.first, dispatch_groups.second, 1);
// Copy from decoding texture -> final texture
// This is because we don't want to have to create compute view for every layer

View File

@ -519,10 +519,49 @@ UBO_BINDING(std140, 1) uniform UBO {
uint u_palette_offset;
};
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#if defined(API_METAL)
#if defined(TEXEL_BUFFER_FORMAT_R8)
SSBO_BINDING(0) readonly buffer Input { uint8_t s_input_buffer[]; };
#define FETCH(offset) uint(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_R16)
SSBO_BINDING(0) readonly buffer Input { uint16_t s_input_buffer[]; };
#define FETCH(offset) uint(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_RGBA8)
SSBO_BINDING(0) readonly buffer Input { u8vec4 s_input_buffer[]; };
#define FETCH(offset) uvec4(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_R32G32)
SSBO_BINDING(0) readonly buffer Input { uvec2 s_input_buffer[]; };
#define FETCH(offset) s_input_buffer[offset]
#else
#error No texel buffer?
#endif
#ifdef HAS_PALETTE
SSBO_BINDING(1) readonly buffer Palette { uint16_t s_palette_buffer[]; };
#define FETCH_PALETTE(offset) uint(s_palette_buffer[offset])
#endif
#else
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#if defined(TEXEL_BUFFER_FORMAT_R8) || defined(TEXEL_BUFFER_FORMAT_R16)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).r
#elif defined(TEXEL_BUFFER_FORMAT_RGBA8)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset))
#elif defined(TEXEL_BUFFER_FORMAT_R32G32)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).rg
#else
#error No texel buffer?
#endif
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#define FETCH_PALETTE(offset) texelFetch(s_palette_buffer, int((offset) + u_palette_offset)).r
#endif
#endif // defined(API_METAL)
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
#define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier();
@ -563,7 +602,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
{
uint2 block = coords / block_size;
uint2 offset = coords % block_size;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * (block_size.x * block_size.y);
buffer_pos += offset.y * block_size.x;
@ -575,7 +614,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
uint4 GetPaletteColor(uint index)
{
// Fetch and swap BE to LE.
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
uint val = Swap16(FETCH_PALETTE(index));
uint4 color;
#if defined(PALETTE_FORMAT_IA8)
@ -633,14 +672,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint i;
if ((coords.x & 1u) == 0u)
i = Convert4To8((val >> 4));
@ -663,7 +702,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint i = Convert4To8((val & 0x0Fu));
uint a = Convert4To8((val >> 4));
uint4 color = uint4(i, i, i, a);
@ -681,7 +720,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i = FETCH(buffer_pos);
uint4 color = uint4(i, i, i, i);
float4 norm_color = float4(color) / 255.0;
@ -697,7 +736,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint a = (val & 0xFFu);
uint i = (val >> 8);
uint4 color = uint4(i, i, i, a);
@ -714,7 +753,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uint val = Swap16(FETCH(buffer_pos));
uint4 color;
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
@ -736,7 +775,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uint val = Swap16(FETCH(buffer_pos));
uint4 color;
if ((val & 0x8000u) != 0u)
@ -771,7 +810,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// for the entire block, then the GB channels afterwards.
uint2 block = coords.xy / 4u;
uint2 offset = coords.xy % 4u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
// Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
buffer_pos += block.y * u_src_row_stride;
@ -780,8 +819,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
buffer_pos += offset.x;
// The two GB channels follow after the block's AR channels.
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
uint val1 = FETCH(buffer_pos + 0u);
uint val2 = FETCH(buffer_pos + 16u);
uint4 color;
color.a = (val1 & 0xFFu);
@ -835,14 +874,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Calculate tiled block coordinates.
uint2 tile_block_coords = block_coords / 2u;
uint2 subtile_block_coords = block_coords % 2u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += tile_block_coords.y * u_src_row_stride;
buffer_pos += tile_block_coords.x * 4u;
buffer_pos += subtile_block_coords.y * 2u;
buffer_pos += subtile_block_coords.x;
// Read the entire DXT block to shared memory.
uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
uint2 raw_data = FETCH(buffer_pos);
shared_temp[block_in_group] = raw_data;
}
@ -921,14 +960,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
@ -945,7 +984,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint index = FETCH(buffer_pos);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
@ -960,7 +999,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
uint index = Swap16(FETCH(buffer_pos)) & 0x3FFFu;
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
@ -976,8 +1015,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
DEFINE_MAIN(8, 8)
{
uint2 uv = gl_GlobalInvocationID.xy;
int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u));
float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos));
uint buffer_pos = (uv.y * u_src_row_stride) + (uv.x / 2u);
float4 yuyv = float4(FETCH(buffer_pos));
float y = (uv.x & 1u) != 0u ? yuyv.b : yuyv.r;
@ -1034,6 +1073,25 @@ std::string GenerateDecodingShader(TextureFormat format, std::optional<TLUTForma
}
}
switch (info->buffer_format)
{
case TEXEL_BUFFER_FORMAT_R8_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R8 1\n";
break;
case TEXEL_BUFFER_FORMAT_R16_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R16 1\n";
break;
case TEXEL_BUFFER_FORMAT_RGBA8_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_RGBA8 1\n";
break;
case TEXEL_BUFFER_FORMAT_R32G32_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R32G32 1\n";
break;
case NUM_TEXEL_BUFFER_FORMATS:
ASSERT(0);
break;
}
ss << decoding_shader_header;
ss << info->shader_body;
@ -1121,7 +1179,10 @@ float4 DecodePixel(int val)
ss << "\n";
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
if (api_type == APIType::Metal)
ss << "SSBO_BINDING(0) readonly buffer Palette { uint16_t palette[]; };\n";
else
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
@ -1143,9 +1204,12 @@ float4 DecodePixel(int val)
ss << "void main() {\n";
ss << " float3 coords = v_tex0;\n";
ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n";
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
if (api_type == APIType::Metal)
ss << " src = int(palette[uint(src)]);\n";
else
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n";
ss << " src = ((src << 8) | (src >> 8)) & 0xFFFF;\n";
ss << " ocol0 = DecodePixel(src);\n";
ss << "}\n";

View File

@ -82,7 +82,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
{
if (use_dual_source)
{

View File

@ -35,6 +35,9 @@
#ifdef HAS_VULKAN
#include "VideoBackends/Vulkan/VideoBackend.h"
#endif
#ifdef __APPLE__
#include "VideoBackends/Metal/VideoBackend.h"
#endif
#include "VideoCommon/AsyncRequests.h"
#include "VideoCommon/BPStructs.h"
@ -227,6 +230,7 @@ const std::vector<std::unique_ptr<VideoBackendBase>>& VideoBackendBase::GetAvail
#ifdef __APPLE__
// Emplace the Vulkan backend at the beginning so it takes precedence over OpenGL.
backends.emplace(backends.begin(), std::make_unique<Vulkan::VideoBackend>());
backends.push_back(std::make_unique<Metal::VideoBackend>());
#else
backends.push_back(std::make_unique<Vulkan::VideoBackend>());
#endif

View File

@ -39,6 +39,7 @@ enum class APIType
OpenGL,
D3D,
Vulkan,
Metal,
Nothing
};