mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 22:09:19 -07:00
a2702c6e27
To further increase the accuracy of the post process phase, I've added (scRGB) HDR support, which is necessary to fully display the PAL and NTSC-J color spaces, and also to improve the quality of post process texture samplings and do them in linear space instead of gamma space (which is very important when playing at low resolutions). For SDR, the quality is also slightly increased, at least if any post process runs, as the buffer is now R10G10B10A2 (on Vulkan, DX11 and DX12) if supported; previously it was R8G8B8A8 but the alpha bits were wasted. Gamma correction is arguably the most important thing as Dolphin on Windows outputted in "sRGB" (implicitly) as that's what Windows expects by default, though sRGB gamma is very different from the gamma commonly used by video standards dating to the pre HDR era (roughly gamma 2.35). Additionally, the addition of HDR support (which is pretty straight forward and minimal), added support for our own custom AutoHDR shaders, which would allow us to achieve decent looking HDR in Dolphin games without having to use SpecialK or Windows 11 AutoHDR. Both of which don't necessarily play nice with older games with strongly different and simpler lighting. HDR should also be supported in Linux. Development of my own AutoHDR shader is almost complete and will come next. This has been carefully tested and there should be no regression in any of the different features that Dolphin offers, like multisampling, stereo rendering, other post processes, etc etc. Fixes: https://bugs.dolphin-emu.org/issues/8941 Co-authored-by: EndlesslyFlowering <EndlesslyFlowering@protonmail.com> Co-authored-by: Dogway <lin_ares@hotmail.com>
695 lines
21 KiB
C++
695 lines
21 KiB
C++
// Copyright 2019 Dolphin Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "VideoCommon/FramebufferShaderGen.h"
|
|
|
|
#include <string_view>
|
|
|
|
#include "Common/Logging/Log.h"
|
|
|
|
#include "VideoCommon/FramebufferManager.h"
|
|
#include "VideoCommon/ShaderGenCommon.h"
|
|
#include "VideoCommon/TextureDecoder.h"
|
|
#include "VideoCommon/VertexShaderGen.h"
|
|
#include "VideoCommon/VideoCommon.h"
|
|
#include "VideoCommon/VideoConfig.h"
|
|
|
|
namespace FramebufferShaderGen
|
|
{
|
|
namespace
|
|
{
|
|
APIType GetAPIType()
|
|
{
|
|
return g_ActiveConfig.backend_info.api_type;
|
|
}
|
|
|
|
void EmitUniformBufferDeclaration(ShaderCode& code)
|
|
{
|
|
code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n");
|
|
}
|
|
|
|
void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
|
|
bool multisampled = false)
|
|
{
|
|
switch (GetAPIType())
|
|
{
|
|
case APIType::D3D:
|
|
case APIType::Metal:
|
|
case APIType::OpenGL:
|
|
case APIType::Vulkan:
|
|
{
|
|
const char* array_type = multisampled ? "sampler2DMSArray" : "sampler2DArray";
|
|
|
|
for (u32 i = start; i < end; i++)
|
|
{
|
|
code.Write("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i);
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
|
|
{
|
|
switch (GetAPIType())
|
|
{
|
|
case APIType::D3D:
|
|
case APIType::Metal:
|
|
case APIType::OpenGL:
|
|
case APIType::Vulkan:
|
|
code.Write("texture(samp{}, {})", n, coords);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Emits a texel fetch/load instruction. Assumes that "coords" is a 4-element vector, with z
|
|
// containing the layer, and w containing the mipmap level.
|
|
void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
|
|
{
|
|
switch (GetAPIType())
|
|
{
|
|
case APIType::D3D:
|
|
case APIType::Metal:
|
|
case APIType::OpenGL:
|
|
case APIType::Vulkan:
|
|
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_color_inputs,
|
|
bool position_input, u32 num_tex_outputs, u32 num_color_outputs,
|
|
std::string_view extra_inputs = {})
|
|
{
|
|
switch (GetAPIType())
|
|
{
|
|
case APIType::D3D:
|
|
case APIType::Metal:
|
|
case APIType::OpenGL:
|
|
case APIType::Vulkan:
|
|
{
|
|
for (u32 i = 0; i < num_tex_inputs; i++)
|
|
{
|
|
const auto attribute = ShaderAttrib::TexCoord0 + i;
|
|
code.Write("ATTRIBUTE_LOCATION({:s}) in float3 rawtex{};\n", attribute, i);
|
|
}
|
|
for (u32 i = 0; i < num_color_inputs; i++)
|
|
{
|
|
const auto attribute = ShaderAttrib::Color0 + i;
|
|
code.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawcolor{};\n", attribute, i);
|
|
}
|
|
if (position_input)
|
|
code.Write("ATTRIBUTE_LOCATION({:s}) in float4 rawpos;\n", ShaderAttrib::Position);
|
|
|
|
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
|
|
{
|
|
code.Write("VARYING_LOCATION(0) out VertexData {{\n");
|
|
for (u32 i = 0; i < num_tex_outputs; i++)
|
|
code.Write(" float3 v_tex{};\n", i);
|
|
for (u32 i = 0; i < num_color_outputs; i++)
|
|
code.Write(" float4 v_col{};\n", i);
|
|
code.Write("}};\n");
|
|
}
|
|
else
|
|
{
|
|
for (u32 i = 0; i < num_tex_outputs; i++)
|
|
code.Write("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i);
|
|
for (u32 i = 0; i < num_color_outputs; i++)
|
|
code.Write("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i);
|
|
}
|
|
code.Write("#define opos gl_Position\n");
|
|
code.Write("{}\n", extra_inputs);
|
|
code.Write("void main()\n");
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_color_inputs,
|
|
std::string_view output_type = "float4",
|
|
std::string_view extra_vars = {}, bool emit_frag_coord = false)
|
|
{
|
|
switch (GetAPIType())
|
|
{
|
|
case APIType::D3D:
|
|
case APIType::Metal:
|
|
case APIType::OpenGL:
|
|
case APIType::Vulkan:
|
|
{
|
|
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
|
|
{
|
|
code.Write("VARYING_LOCATION(0) in VertexData {{\n");
|
|
for (u32 i = 0; i < num_tex_inputs; i++)
|
|
code.Write(" float3 v_tex{};\n", i);
|
|
for (u32 i = 0; i < num_color_inputs; i++)
|
|
code.Write(" float4 v_col{};\n", i);
|
|
code.Write("}};\n");
|
|
}
|
|
else
|
|
{
|
|
for (u32 i = 0; i < num_tex_inputs; i++)
|
|
code.Write("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i);
|
|
for (u32 i = 0; i < num_color_inputs; i++)
|
|
code.Write("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i);
|
|
}
|
|
|
|
code.Write("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type);
|
|
code.Write("{}\n", extra_vars);
|
|
if (emit_frag_coord)
|
|
code.Write("#define frag_coord gl_FragCoord\n");
|
|
code.Write("void main()\n");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
} // Anonymous namespace
|
|
|
|
std::string GenerateScreenQuadVertexShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
|
|
|
|
"#define id gl_VertexID\n");
|
|
code.Write(
|
|
"{{\n"
|
|
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
|
|
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
|
|
|
|
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
|
|
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
|
|
code.Write(" opos.y = -opos.y;\n");
|
|
|
|
code.Write("}}\n");
|
|
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
|
|
{
|
|
ShaderCode code;
|
|
if (GetAPIType() == APIType::D3D)
|
|
{
|
|
code.Write("struct VS_OUTPUT\n"
|
|
"{{\n");
|
|
for (u32 i = 0; i < num_tex; i++)
|
|
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
|
|
for (u32 i = 0; i < num_colors; i++)
|
|
code.Write(" float4 color{} : TEXCOORD{};\n", i, i + num_tex);
|
|
code.Write(" float4 position : SV_Position;\n"
|
|
"}};\n");
|
|
|
|
code.Write("struct GS_OUTPUT\n"
|
|
"{{");
|
|
for (u32 i = 0; i < num_tex; i++)
|
|
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
|
|
for (u32 i = 0; i < num_colors; i++)
|
|
code.Write(" float4 color{} : TEXCOORD{};\n", i, i + num_tex);
|
|
code.Write(" float4 position : SV_Position;\n"
|
|
" uint slice : SV_RenderTargetArrayIndex;\n"
|
|
"}};\n\n");
|
|
|
|
code.Write("[maxvertexcount(6)]\n"
|
|
"void main(triangle VS_OUTPUT vso[3], inout TriangleStream<GS_OUTPUT> output)\n"
|
|
"{{\n"
|
|
" for (uint slice = 0; slice < 2u; slice++)\n"
|
|
" {{\n"
|
|
" for (int i = 0; i < 3; i++)\n"
|
|
" {{\n"
|
|
" GS_OUTPUT gso;\n"
|
|
" gso.position = vso[i].position;\n");
|
|
for (u32 i = 0; i < num_tex; i++)
|
|
code.Write(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i);
|
|
for (u32 i = 0; i < num_colors; i++)
|
|
code.Write(" gso.color{} = vso[i].color{};\n", i, i);
|
|
code.Write(" gso.slice = slice;\n"
|
|
" output.Append(gso);\n"
|
|
" }}\n"
|
|
" output.RestartStrip();\n"
|
|
" }}\n"
|
|
"}}\n");
|
|
}
|
|
else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan)
|
|
{
|
|
code.Write("layout(triangles) in;\n"
|
|
"layout(triangle_strip, max_vertices = 6) out;\n");
|
|
|
|
if (num_tex > 0 || num_colors > 0)
|
|
{
|
|
code.Write("VARYING_LOCATION(0) in VertexData {{\n");
|
|
for (u32 i = 0; i < num_tex; i++)
|
|
code.Write(" float3 v_tex{};\n", i);
|
|
for (u32 i = 0; i < num_colors; i++)
|
|
code.Write(" float4 v_col{};\n", i);
|
|
code.Write("}} v_in[];\n");
|
|
|
|
code.Write("VARYING_LOCATION(0) out VertexData {{\n");
|
|
for (u32 i = 0; i < num_tex; i++)
|
|
code.Write(" float3 v_tex{};\n", i);
|
|
for (u32 i = 0; i < num_colors; i++)
|
|
code.Write(" float4 v_col{};\n", i);
|
|
code.Write("}} v_out;\n");
|
|
}
|
|
code.Write("\n"
|
|
"void main()\n"
|
|
"{{\n"
|
|
" for (int j = 0; j < 2; j++)\n"
|
|
" {{\n"
|
|
" gl_Layer = j;\n");
|
|
|
|
// We have to explicitly unroll this loop otherwise the GL compiler gets cranky.
|
|
for (u32 v = 0; v < 3; v++)
|
|
{
|
|
code.Write(" gl_Position = gl_in[{}].gl_Position;\n", v);
|
|
for (u32 i = 0; i < num_tex; i++)
|
|
{
|
|
code.Write(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i);
|
|
}
|
|
for (u32 i = 0; i < num_colors; i++)
|
|
code.Write(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i);
|
|
code.Write(" EmitVertex();\n\n");
|
|
}
|
|
code.Write(" EndPrimitive();\n"
|
|
" }}\n"
|
|
"}}\n");
|
|
}
|
|
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateTextureCopyVertexShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitUniformBufferDeclaration(code);
|
|
code.Write("{{"
|
|
" float2 src_offset;\n"
|
|
" float2 src_size;\n"
|
|
"}};\n\n");
|
|
|
|
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
|
|
|
|
"#define id gl_VertexID");
|
|
code.Write("{{\n"
|
|
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
|
|
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"
|
|
" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
|
|
|
|
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
|
|
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
|
|
code.Write(" opos.y = -opos.y;\n");
|
|
|
|
code.Write("}}\n");
|
|
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateTextureCopyPixelShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 1, false);
|
|
EmitPixelMainDeclaration(code, 1, 0);
|
|
code.Write("{{\n"
|
|
" ocol0 = ");
|
|
EmitSampleTexture(code, 0, "v_tex0");
|
|
code.Write(";\n"
|
|
"}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateColorPixelShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitPixelMainDeclaration(code, 0, 1);
|
|
code.Write("{{\n"
|
|
" ocol0 = v_col0;\n"
|
|
"}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateResolveColorPixelShader(u32 samples)
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 1, true);
|
|
EmitPixelMainDeclaration(code, 1, 0);
|
|
code.Write("{{\n"
|
|
" int layer = int(v_tex0.z);\n"
|
|
" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"
|
|
" ocol0 = float4(0.0f);\n");
|
|
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
|
|
code.Write(" ocol0 += texelFetch(samp0, coords, i);\n");
|
|
code.Write(" ocol0 /= {}.0f;\n", samples);
|
|
code.Write("}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateResolveDepthPixelShader(u32 samples)
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 1, true);
|
|
EmitPixelMainDeclaration(code, 1, 0, "float", "");
|
|
code.Write("{{\n"
|
|
" int layer = int(v_tex0.z);\n");
|
|
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
|
|
|
|
// Take the minimum of all depth samples.
|
|
code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
|
|
code.Write(" for (int i = 1; i < {}; i++)\n", samples);
|
|
code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
|
|
|
|
code.Write("}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateClearVertexShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitUniformBufferDeclaration(code);
|
|
code.Write("{{\n"
|
|
" float4 clear_color;\n"
|
|
" float clear_depth;\n"
|
|
"}};\n");
|
|
|
|
EmitVertexMainDeclaration(code, 0, 0, false, 0, 1,
|
|
|
|
"#define id gl_VertexID\n");
|
|
code.Write(
|
|
"{{\n"
|
|
" float2 coord = float2(float((id << 1) & 2), float(id & 2));\n"
|
|
" opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n"
|
|
" v_col0 = clear_color;\n");
|
|
|
|
// NDC space is flipped in Vulkan
|
|
if (GetAPIType() == APIType::Vulkan)
|
|
code.Write(" opos.y = -opos.y;\n");
|
|
|
|
code.Write("}}\n");
|
|
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateEFBPokeVertexShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitVertexMainDeclaration(code, 0, 1, true, 0, 1);
|
|
code.Write("{{\n"
|
|
" v_col0 = rawcolor0;\n"
|
|
" opos = float4(rawpos.xyz, 1.0f);\n");
|
|
if (g_ActiveConfig.backend_info.bSupportsLargePoints)
|
|
code.Write(" gl_PointSize = rawpos.w;\n");
|
|
|
|
// NDC space is flipped in Vulkan.
|
|
if (GetAPIType() == APIType::Vulkan)
|
|
code.Write(" opos.y = -opos.y;\n");
|
|
|
|
code.Write("}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples)
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 1, samples > 1);
|
|
EmitPixelMainDeclaration(code, 1, 0, "float4",
|
|
|
|
"");
|
|
code.Write("{{\n"
|
|
" int layer = int(v_tex0.z);\n");
|
|
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
|
|
|
|
if (samples == 1)
|
|
{
|
|
// No MSAA at all.
|
|
code.Write(" float4 val = texelFetch(samp0, coords, 0);\n");
|
|
}
|
|
else if (g_ActiveConfig.bSSAA)
|
|
{
|
|
// Sample shading, shader runs once per sample
|
|
code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
|
|
}
|
|
else
|
|
{
|
|
// MSAA without sample shading, average out all samples.
|
|
code.Write(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
|
|
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
|
|
code.Write(" val += texelFetch(samp0, coords, i);\n");
|
|
code.Write(" val /= float({});\n", samples);
|
|
}
|
|
|
|
switch (convtype)
|
|
{
|
|
case EFBReinterpretType::RGB8ToRGBA6:
|
|
code.Write(" int4 src8 = int4(round(val * 255.f));\n"
|
|
" int4 dst6;\n"
|
|
" dst6.r = src8.r >> 2;\n"
|
|
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
|
|
" dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
|
|
" dst6.a = src8.b & 0x3F;\n"
|
|
" ocol0 = float4(dst6) / 63.f;\n");
|
|
break;
|
|
|
|
case EFBReinterpretType::RGB8ToRGB565:
|
|
code.Write(" ocol0 = val;\n");
|
|
break;
|
|
|
|
case EFBReinterpretType::RGBA6ToRGB8:
|
|
code.Write(" int4 src6 = int4(round(val * 63.f));\n"
|
|
" int4 dst8;\n"
|
|
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
|
|
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
|
|
" dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
|
|
" dst8.a = 255;\n"
|
|
" ocol0 = float4(dst8) / 255.f;\n");
|
|
break;
|
|
|
|
case EFBReinterpretType::RGBA6ToRGB565:
|
|
code.Write(" ocol0 = val;\n");
|
|
break;
|
|
|
|
case EFBReinterpretType::RGB565ToRGB8:
|
|
code.Write(" ocol0 = val;\n");
|
|
break;
|
|
|
|
case EFBReinterpretType::RGB565ToRGBA6:
|
|
//
|
|
code.Write(" ocol0 = val;\n");
|
|
break;
|
|
}
|
|
|
|
code.Write("}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format)
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 1, false);
|
|
EmitPixelMainDeclaration(code, 1, 0, "float4", "", true);
|
|
code.Write("{{\n"
|
|
" int layer = int(v_tex0.z);\n"
|
|
" int4 coords = int4(int2(frag_coord.xy), layer, 0);\n");
|
|
|
|
// Convert to a 32-bit value encompassing all channels, filling the most significant bits with
|
|
// zeroes.
|
|
code.Write(" uint raw_value;\n");
|
|
switch (from_format)
|
|
{
|
|
case TextureFormat::I8:
|
|
case TextureFormat::C8:
|
|
{
|
|
code.Write(" float4 temp_value = ");
|
|
EmitTextureLoad(code, 0, "coords");
|
|
code.Write(";\n"
|
|
" raw_value = uint(temp_value.r * 255.0);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::IA8:
|
|
{
|
|
code.Write(" float4 temp_value = ");
|
|
EmitTextureLoad(code, 0, "coords");
|
|
code.Write(";\n"
|
|
" raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::I4:
|
|
{
|
|
code.Write(" float4 temp_value = ");
|
|
EmitTextureLoad(code, 0, "coords");
|
|
code.Write(";\n"
|
|
" raw_value = uint(temp_value.r * 15.0);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::IA4:
|
|
{
|
|
code.Write(" float4 temp_value = ");
|
|
EmitTextureLoad(code, 0, "coords");
|
|
code.Write(";\n"
|
|
" raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::RGB565:
|
|
{
|
|
code.Write(" float4 temp_value = ");
|
|
EmitTextureLoad(code, 0, "coords");
|
|
code.Write(";\n"
|
|
" raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n"
|
|
" (uint(temp_value.r * 31.0) << 11);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::RGB5A3:
|
|
{
|
|
code.Write(" float4 temp_value = ");
|
|
EmitTextureLoad(code, 0, "coords");
|
|
code.Write(";\n");
|
|
|
|
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
|
|
code.Write(
|
|
" if (temp_value.a > 0.878f) {{\n"
|
|
" raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n"
|
|
" (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n"
|
|
" }} else {{\n"
|
|
" raw_value = (uint(temp_value.b * 15.0)) | (uint(temp_value.g * 15.0) << 4) |\n"
|
|
" (uint(temp_value.r * 15.0) << 8) | (uint(temp_value.a * 7.0) << 12);\n"
|
|
" }}\n");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
WARN_LOG_FMT(VIDEO, "From format {} is not supported", from_format);
|
|
return "{}\n";
|
|
}
|
|
|
|
// Now convert it to its new representation.
|
|
switch (to_format)
|
|
{
|
|
case TextureFormat::I8:
|
|
case TextureFormat::C8:
|
|
{
|
|
code.Write(" float orgba = float(raw_value & 0xFFu) / 255.0;\n"
|
|
" ocol0 = float4(orgba, orgba, orgba, orgba);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::IA8:
|
|
{
|
|
code.Write(" float orgb = float(raw_value & 0xFFu) / 255.0;\n"
|
|
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 8) & 0xFFu) / 255.0);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::IA4:
|
|
{
|
|
code.Write(" float orgb = float(raw_value & 0xFu) / 15.0;\n"
|
|
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 4) & 0xFu) / 15.0);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::RGB565:
|
|
{
|
|
code.Write(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
|
|
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
|
|
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n");
|
|
}
|
|
break;
|
|
|
|
case TextureFormat::RGB5A3:
|
|
{
|
|
code.Write(" if ((raw_value & 0x8000u) != 0u) {{\n"
|
|
" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
|
|
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
|
|
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n"
|
|
" }} else {{\n"
|
|
" ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n"
|
|
" float((raw_value >> 4) & 0x0Fu) / 15.0,\n"
|
|
" float(raw_value & 0x0Fu) / 15.0,\n"
|
|
" float((raw_value >> 12) & 0x07u) / 7.0);\n"
|
|
" }}\n");
|
|
}
|
|
break;
|
|
default:
|
|
WARN_LOG_FMT(VIDEO, "To format {} is not supported", to_format);
|
|
return "{}\n";
|
|
}
|
|
|
|
code.Write("}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateEFBRestorePixelShader()
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 2, false);
|
|
EmitPixelMainDeclaration(code, 1, 0, "float4", "");
|
|
code.Write("{{\n"
|
|
" ocol0 = ");
|
|
EmitSampleTexture(code, 0, "v_tex0");
|
|
code.Write(";\n");
|
|
code.Write(" gl_FragDepth = ");
|
|
EmitSampleTexture(code, 1, "v_tex0");
|
|
code.Write(".r;\n"
|
|
"}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateImGuiVertexShader()
|
|
{
|
|
ShaderCode code;
|
|
|
|
// Uniform buffer contains the viewport size, and we transform in the vertex shader.
|
|
EmitUniformBufferDeclaration(code);
|
|
code.Write("{{\n"
|
|
"float2 u_rcp_viewport_size_mul2;\n"
|
|
"}};\n\n");
|
|
|
|
EmitVertexMainDeclaration(code, 1, 1, true, 1, 1);
|
|
code.Write("{{\n"
|
|
" v_tex0 = float3(rawtex0.xy, 0.0);\n"
|
|
" v_col0 = rawcolor0;\n"
|
|
" opos = float4(rawpos.x * u_rcp_viewport_size_mul2.x - 1.0,"
|
|
" 1.0 - rawpos.y * u_rcp_viewport_size_mul2.y, 0.0, 1.0);\n");
|
|
|
|
// NDC space is flipped in Vulkan.
|
|
if (GetAPIType() == APIType::Vulkan)
|
|
code.Write(" opos.y = -opos.y;\n");
|
|
|
|
code.Write("}}\n");
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
std::string GenerateImGuiPixelShader(bool linear_space_output)
|
|
{
|
|
ShaderCode code;
|
|
EmitSamplerDeclarations(code, 0, 1, false);
|
|
EmitPixelMainDeclaration(code, 1, 1);
|
|
code.Write("{{\n"
|
|
" ocol0 = ");
|
|
EmitSampleTexture(code, 0, "float3(v_tex0.xy, 0.0)");
|
|
// We approximate to gamma 2.2 instead of sRGB as it barely matters for this case.
|
|
// Note that if HDR is enabled, ideally we should multiply by
|
|
// the paper white brightness for readability.
|
|
if (linear_space_output)
|
|
code.Write(" * pow(v_col0, float4(2.2f, 2.2f, 2.2f, 1.0f));\n}}\n");
|
|
else
|
|
code.Write(" * v_col0;\n}}\n");
|
|
|
|
return code.GetBuffer();
|
|
}
|
|
|
|
} // namespace FramebufferShaderGen
|