dolphin/Source/Core/VideoCommon/ShaderGenCommon.cpp
Pokechu22 f3df3a7727 PixelShaderGen: Clamp texture layer when using manual texture sampling with stereoscopic 3D
Otherwise, texelFetch() will use an out-of-bounds layer for game textures (that have 1 layer; EFB copies have 2 layers in stereoscopic 3D mode), which is undefined behavior (often resulting in a black image). The fast texture sampling path uses texture(), which always clamps (see https://www.khronos.org/opengl/wiki/Array_Texture#Access_in_shaders), so it was unaffected by this difference.
2022-12-27 13:45:13 -08:00

351 lines
14 KiB
C++

// Copyright 2017 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/ShaderGenCommon.h"
#include <fmt/format.h>
#include "Common/Assert.h"
#include "Common/FileUtil.h"
#include "Core/ConfigManager.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
ShaderHostConfig ShaderHostConfig::GetCurrent()
{
ShaderHostConfig bits = {};
bits.msaa = g_ActiveConfig.iMultisamples > 1;
bits.ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA &&
g_ActiveConfig.backend_info.bSupportsSSAA;
bits.stereo = g_ActiveConfig.stereo_mode != StereoMode::Off;
bits.wireframe = g_ActiveConfig.bWireFrame;
bits.per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
bits.vertex_rounding = g_ActiveConfig.UseVertexRounding();
bits.fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
bits.bounding_box = g_ActiveConfig.bBBoxEnable;
bits.backend_dual_source_blend = g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
bits.backend_geometry_shaders = g_ActiveConfig.backend_info.bSupportsGeometryShaders;
bits.backend_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ;
bits.backend_bbox = g_ActiveConfig.backend_info.bSupportsBBox;
bits.backend_gs_instancing = g_ActiveConfig.backend_info.bSupportsGSInstancing;
bits.backend_clip_control = g_ActiveConfig.backend_info.bSupportsClipControl;
bits.backend_ssaa = g_ActiveConfig.backend_info.bSupportsSSAA;
bits.backend_atomics = g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics;
bits.backend_depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp;
bits.backend_reversed_depth_range = g_ActiveConfig.backend_info.bSupportsReversedDepthRange;
bits.backend_bitfield = g_ActiveConfig.backend_info.bSupportsBitfield;
bits.backend_dynamic_sampler_indexing =
g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing;
bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp;
bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion;
bits.enable_validation_layer = g_ActiveConfig.bEnableValidationLayer;
bits.manual_texture_sampling = !g_ActiveConfig.bFastTextureSampling;
bits.manual_texture_sampling_custom_texture_sizes =
g_ActiveConfig.ManualTextureSamplingWithCustomTextureSizes();
bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler;
bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader;
bits.backend_vs_point_line_expand = g_ActiveConfig.UseVSForLinePointExpand();
return bits;
}
std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool include_gameid,
bool include_host_config, bool include_api)
{
if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));
std::string filename = File::GetUserPath(D_SHADERCACHE_IDX);
if (include_api)
{
switch (api_type)
{
case APIType::D3D:
filename += "D3D";
break;
case APIType::Metal:
filename += "Metal";
break;
case APIType::OpenGL:
filename += "OpenGL";
break;
case APIType::Vulkan:
filename += "Vulkan";
break;
default:
break;
}
filename += '-';
}
filename += type;
if (include_gameid)
{
filename += '-';
filename += SConfig::GetInstance().GetGameID();
}
if (include_host_config)
{
// We're using 21 bits, so 6 hex characters.
ShaderHostConfig host_config = ShaderHostConfig::GetCurrent();
filename += fmt::format("-{:06X}", host_config.bits);
}
filename += ".cache";
return filename;
}
void WriteIsNanHeader(ShaderCode& out, APIType api_type)
{
out.Write("#define dolphin_isnan(f) isnan(f)\n");
}
void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config)
{
// ==============================================
// BitfieldExtract for APIs which don't have it
// ==============================================
if (!host_config.backend_bitfield)
{
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
out.Write("int bitfieldExtract(int val, int off, int size) {{\n"
" // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" return ((val << (32 - size - off)) >> (32 - size));\n"
"}}\n\n");
}
}
static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string_view qualifier,
std::string_view type, std::string_view name, int var_index,
ShaderStage stage, std::string_view semantic = {},
int semantic_index = -1)
{
object.Write("\t{} {} {}", qualifier, type, name);
if (var_index != -1)
object.Write("{}", var_index);
if (api_type == APIType::D3D && !semantic.empty() && stage == ShaderStage::Geometry)
{
if (semantic_index != -1)
object.Write(" : {}{}", semantic, semantic_index);
else
object.Write(" : {}", semantic);
}
object.Write(";\n");
}
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, std::string_view qualifier,
ShaderStage stage)
{
// SPIRV-Cross names all semantics as "TEXCOORD"
// Unfortunately Geometry shaders (which also uses this function)
// aren't supported. The output semantic name needs to match
// up with the input semantic name for both the next stage (pixel shader)
// and the previous stage (vertex shader), so
// we need to handle geometry in a special way...
if (api_type == APIType::D3D && stage == ShaderStage::Geometry)
{
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, stage, "TEXCOORD", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, stage, "TEXCOORD", 1);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, stage, "TEXCOORD", 2);
const unsigned int index_base = 3;
unsigned int index_offset = 0;
if (host_config.backend_geometry_shaders)
{
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, stage, "TEXCOORD",
index_base + index_offset);
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, stage, "TEXCOORD",
index_base + index_offset + 1);
index_offset += 2;
}
for (unsigned int i = 0; i < texgens; ++i)
{
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, stage, "TEXCOORD",
index_base + index_offset + i);
}
index_offset += texgens;
if (!host_config.fast_depth_calc)
{
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, stage, "TEXCOORD",
index_base + index_offset);
index_offset++;
}
if (host_config.per_pixel_lighting)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, stage, "TEXCOORD",
index_base + index_offset);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, stage, "TEXCOORD",
index_base + index_offset + 1);
index_offset += 2;
}
}
else
{
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, stage, "SV_Position");
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, stage, "COLOR", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, stage, "COLOR", 1);
if (host_config.backend_geometry_shaders)
{
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, stage,
"SV_ClipDistance", 0);
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, stage,
"SV_ClipDistance", 1);
}
for (unsigned int i = 0; i < texgens; ++i)
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, stage, "TEXCOORD", i);
if (!host_config.fast_depth_calc)
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, stage, "TEXCOORD",
texgens);
if (host_config.per_pixel_lighting)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, stage, "TEXCOORD",
texgens + 1);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, stage, "TEXCOORD",
texgens + 2);
}
}
}
void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens,
const ShaderHostConfig& host_config)
{
object.Write("\t{}.pos = {}.pos;\n", a, b);
object.Write("\t{}.colors_0 = {}.colors_0;\n", a, b);
object.Write("\t{}.colors_1 = {}.colors_1;\n", a, b);
for (unsigned int i = 0; i < texgens; ++i)
object.Write("\t{}.tex{} = {}.tex{};\n", a, i, b, i);
if (!host_config.fast_depth_calc)
object.Write("\t{}.clipPos = {}.clipPos;\n", a, b);
if (host_config.per_pixel_lighting)
{
object.Write("\t{}.Normal = {}.Normal;\n", a, b);
object.Write("\t{}.WorldPos = {}.WorldPos;\n", a, b);
}
if (host_config.backend_geometry_shaders)
{
object.Write("\t{}.clipDist0 = {}.clipDist0;\n", a, b);
object.Write("\t{}.clipDist1 = {}.clipDist1;\n", a, b);
}
}
void GenerateLineOffset(ShaderCode& object, std::string_view indent0, std::string_view indent1,
std::string_view pos_a, std::string_view pos_b, std::string_view sign)
{
// GameCube/Wii's line drawing algorithm is a little quirky. It does not
// use the correct line caps. Instead, the line caps are vertical or
// horizontal depending the slope of the line.
object.Write("{indent0}float2 offset;\n"
"{indent0}float2 to = abs({pos_a}.xy / {pos_a}.w - {pos_b}.xy / {pos_b}.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
"{indent0}if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
"{indent1}offset = float2({sign}" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
"{indent0}}} else {{\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
"{indent1}offset = float2(0, {sign}-" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
"{indent0}}}\n",
fmt::arg("indent0", indent0), fmt::arg("indent1", indent1), //
fmt::arg("pos_a", pos_a), fmt::arg("pos_b", pos_b), fmt::arg("sign", sign));
}
void GenerateVSLineExpansion(ShaderCode& object, std::string_view indent, u32 texgens)
{
std::string indent1 = std::string(indent) + " ";
object.Write("{0}other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION
"[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION
"[3], other_pos));\n"
"\n"
"{0}float expand_sign = is_right ? 1.0f : -1.0f;\n",
indent);
GenerateLineOffset(object, indent, indent1, "o.pos", "other_pos", "expand_sign * ");
object.Write("\n"
"{}o.pos.xy += offset * o.pos.w;\n",
indent);
if (texgens > 0)
{
object.Write("{}if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n", indent);
object.Write("{} float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n", indent);
for (u32 i = 0; i < texgens; i++)
{
object.Write("{} if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", indent, i);
object.Write("{} o.tex{}.x += texOffset;\n", indent, i);
}
object.Write("{}}}\n", indent);
}
}
void GenerateVSPointExpansion(ShaderCode& object, std::string_view indent, u32 texgens)
{
object.Write(
"{0}float2 expand_sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? -1.0f : 1.0f);\n"
"{0}float2 offset = expand_sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n"
"{0}o.pos.xy += offset * o.pos.w;\n",
indent);
if (texgens > 0)
{
object.Write("{0}if (" I_TEXOFFSET "[3] != 0) {{\n"
"{0} float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n"
"{0} float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, "
"is_bottom ? texOffsetMagnitude : 0.0f);",
indent);
for (u32 i = 0; i < texgens; i++)
{
object.Write("{} if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", indent, i);
object.Write("{} o.tex{}.xy += texOffset;\n", indent, i);
}
object.Write("{}}}\n", indent);
}
}
const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interface_block, bool in)
{
if (!msaa)
return "";
// Without GL_ARB_shading_language_420pack support, the interpolation qualifier must be
// "centroid in" and not "centroid", even within an interface block.
if (in_glsl_interface_block && !g_ActiveConfig.backend_info.bSupportsBindingLayout)
{
if (!ssaa)
return in ? "centroid in" : "centroid out";
else
return in ? "sample in" : "sample out";
}
else
{
if (!ssaa)
return "centroid";
else
return "sample";
}
}