VideoCommon: Add dynamic vertex loader to ubershaders

This commit is contained in:
TellowKrinkle
2022-06-18 01:09:35 -05:00
parent 720b3f5519
commit 4c629c2bee
17 changed files with 253 additions and 66 deletions

View File

@ -22,7 +22,11 @@ VertexShaderUid GetVertexShaderUid()
return out;
}
static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out);
static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config,
u32 num_texgen, ShaderCode& out);
static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent,
std::string_view name, std::string_view shader_type,
std::string_view stored_type, std::string_view offset_name = {});
ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config,
const vertex_ubershader_uid_data* uid_data)
@ -50,15 +54,76 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
WriteBitfieldExtractHeader(out, api_type, host_config);
WriteLightingFunction(out);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
if (host_config.backend_dynamic_vertex_loader)
{
out.Write(R"(
SSBO_BINDING(1) readonly restrict buffer Vertices {{
uint vertex_buffer[];
}};
uint GetVertexBaseOffset() {{
return gl_VertexID * vertex_stride;
}}
uint4 load_input_uint4_ubyte4(uint vtx_offset, uint attr_offset) {{
uint value = vertex_buffer[vtx_offset + attr_offset];
return uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);
}}
float4 load_input_float4_ubyte4(uint vtx_offset, uint attr_offset) {{
return float4(load_input_uint4_ubyte4(vtx_offset, attr_offset)) / 255.0f;
}}
float3 load_input_float3_float3(uint vtx_offset, uint attr_offset) {{
uint offset = vtx_offset + attr_offset;
return float3(uintBitsToFloat(vertex_buffer[offset + 0]),
uintBitsToFloat(vertex_buffer[offset + 1]),
uintBitsToFloat(vertex_buffer[offset + 2]));
}}
float4 load_input_float4_rawpos(uint vtx_offset, uint attr_offset) {{
uint components = attr_offset >> 16;
uint offset = vtx_offset + (attr_offset & 0xffff);
if (components < 3)
return float4(uintBitsToFloat(vertex_buffer[offset + 0]),
uintBitsToFloat(vertex_buffer[offset + 1]),
0.0f, 1.0f);
else
return float4(uintBitsToFloat(vertex_buffer[offset + 0]),
uintBitsToFloat(vertex_buffer[offset + 1]),
uintBitsToFloat(vertex_buffer[offset + 2]),
1.0f);
}}
float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{
uint components = attr_offset >> 16;
uint offset = vtx_offset + (attr_offset & 0xffff);
if (components < 2)
return float3(uintBitsToFloat(vertex_buffer[offset + 0]), 0.0f, 0.0f);
else if (components < 3)
return float3(uintBitsToFloat(vertex_buffer[offset + 0]),
uintBitsToFloat(vertex_buffer[offset + 1]),
0.0f);
else
return float3(uintBitsToFloat(vertex_buffer[offset + 0]),
uintBitsToFloat(vertex_buffer[offset + 1]),
uintBitsToFloat(vertex_buffer[offset + 2]));
}}
)");
}
else
{
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
}
if (host_config.backend_geometry_shaders)
{
@ -99,7 +164,12 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
out.Write("VS_OUTPUT o;\n"
"\n");
if (host_config.backend_dynamic_vertex_loader)
{
out.Write("uint vertex_base_offset = GetVertexBaseOffset();\n");
}
// rawpos is always needed
LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos");
// Transforms
out.Write("// Position matrix\n"
"float4 P0;\n"
@ -113,6 +183,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"\n"
"if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX);
LoadVertexAttribute(out, host_config, 2, "posmtx", "uint4", "ubyte4");
out.Write(" // Vertex format has a per-vertex matrix\n"
" int posidx = int(posmtx.r);\n"
" P0 = " I_TRANSFORMMATRICES "[posidx];\n"
@ -144,27 +215,40 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"// by lighting calculations and needs to be unit length), the same transform matrix\n"
"// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n"
"float3 _normal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NORMAL\n",
"if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
"{{\n",
VB_HAS_NORMAL);
LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3");
out.Write(" _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"}}\n"
"\n"
"float3 _tangent = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n",
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
"{{\n",
VB_HAS_TANGENT);
LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3");
out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n"
"}}\n"
"else\n"
"{{\n"
" _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT
".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n"
"}}\n"
"\n"
"float3 _binormal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n",
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
"{{\n",
VB_HAS_BINORMAL);
LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3");
out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n"
"}}\n"
"else\n"
"{{\n"
" _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL
".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n"
"}}\n"
"\n");
// Hardware Lighting
@ -178,34 +262,40 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"bool use_color_1 = ((components & {0}u) == {0}u); // VB_HAS_COL0 | VB_HAS_COL1\n",
VB_HAS_COL0 | VB_HAS_COL1);
out.Write("for (uint color = 0u; color < {}u; color++) {{\n", NUM_XF_COLOR_CHANNELS);
out.Write(" if ((color == 0u || use_color_1) && (components & ({}u << color)) != 0u) {{\n",
VB_HAS_COL0);
out.Write(" // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are "
"present.\n"
" if (color == 0u)\n"
" vertex_color_0 = rawcolor0;\n"
" else\n"
" vertex_color_1 = rawcolor1;\n"
" }} else if (color == 0u && (components & {}u) != 0u) {{\n",
VB_HAS_COL1);
out.Write(" // Use color1 for channel 0 if color0 is not present.\n"
" vertex_color_0 = rawcolor1;\n"
" }} else {{\n"
" if (color == 0u)\n"
" vertex_color_0 = missing_color_value;\n"
" else\n"
" vertex_color_1 = missing_color_value;\n"
" }}\n"
out.Write("if ((components & {0}u) == {0}u) // VB_HAS_COL0 | VB_HAS_COL1\n"
"{{\n",
VB_HAS_COL0 | VB_HAS_COL1);
LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4");
LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4");
out.Write(" vertex_color_0 = rawcolor0;\n"
" vertex_color_1 = rawcolor1;\n"
"}}\n"
"\n");
"else if ((components & {}u) != 0u) // VB_HAS_COL0\n"
"{{\n",
VB_HAS_COL0);
LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4");
out.Write(" vertex_color_0 = rawcolor0;\n"
" vertex_color_1 = rawcolor0;\n"
"}}\n"
"else if ((components & {}u) != 0u) // VB_HAS_COL1\n"
"{{\n",
VB_HAS_COL1);
LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4");
out.Write(" vertex_color_0 = rawcolor1;\n"
" vertex_color_1 = rawcolor1;\n"
"}}\n"
"else\n"
"{{\n"
" vertex_color_0 = missing_color_value;\n"
" vertex_color_1 = missing_color_value;\n"
"}}\n");
WriteVertexLighting(out, api_type, "pos.xyz", "_normal", "vertex_color_0", "vertex_color_1",
"o.colors_0", "o.colors_1");
// Texture Coordinates
if (num_texgen > 0)
GenVertexShaderTexGens(api_type, num_texgen, out);
GenVertexShaderTexGens(api_type, host_config, num_texgen, out);
if (per_pixel_lighting)
{
@ -352,7 +442,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
return out;
}
static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out)
static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config,
u32 num_texgen, ShaderCode& out)
{
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying
// to dynamically index them.
@ -377,27 +468,40 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
out.Write(" coord.xyz = rawpos.xyz;\n");
out.Write(" break;\n\n");
out.Write(" case {:s}:\n", SourceRow::Normal);
out.Write(" coord.xyz = ((components & {}u /* VB_HAS_NORMAL */) != 0u) ? rawnormal.xyz : "
"coord.xyz;",
out.Write(" if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
" {{\n",
VB_HAS_NORMAL);
out.Write(" break;\n\n");
LoadVertexAttribute(out, host_config, 6, "rawnormal", "float3", "float3");
out.Write(" coord.xyz = rawnormal.xyz;\n"
" }}\n"
" break;\n\n");
out.Write(" case {:s}:\n", SourceRow::BinormalT);
out.Write(" coord.xyz = ((components & {}u /* VB_HAS_TANGENT */) != 0u) ? rawtangent.xyz : "
"coord.xyz;",
out.Write(" if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
" {{\n",
VB_HAS_TANGENT);
out.Write(" break;\n\n");
LoadVertexAttribute(out, host_config, 6, "rawtangent", "float3", "float3");
out.Write(" coord.xyz = rawtangent.xyz;\n"
" }}\n"
" break;\n\n");
out.Write(" case {:s}:\n", SourceRow::BinormalB);
out.Write(" coord.xyz = ((components & {}u /* VB_HAS_BINORMAL */) != 0u) ? rawbinormal.xyz : "
"coord.xyz;",
out.Write(" if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
" {{\n",
VB_HAS_BINORMAL);
out.Write(" break;\n\n");
LoadVertexAttribute(out, host_config, 6, "rawbinormal", "float3", "float3");
out.Write(" coord.xyz = rawbinormal.xyz;\n"
" }}\n"
" break;\n\n");
for (u32 i = 0; i < 8; i++)
{
out.Write(" case {:s}:\n", static_cast<SourceRow>(static_cast<u32>(SourceRow::Tex0) + i));
out.Write(
" coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, "
"1.0, 1.0) : coord;\n",
VB_HAS_UV0 << i, i, i, i);
out.Write(" if ((components & {}u) != 0u) // VB_HAS_UV{}\n"
" {{\n",
VB_HAS_UV0 << i, i);
LoadVertexAttribute(out, host_config, 6, fmt::format("rawtex{}", i), "float3", "rawtex",
fmt::format("rawtex[{}][{}]", i / 4, i % 4));
out.Write(" coord = float4(rawtex{}.x, rawtex{}.y, 1.0f, 1.0f);\n"
" }}\n",
i, i);
out.Write(" break;\n\n");
}
out.Write(" }}\n"
@ -447,14 +551,24 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
" {{\n");
out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
VB_HAS_TEXMTXIDX0);
out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n"
" switch (texgen) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.Write(" }}\n"
"\n");
if (host_config.backend_dynamic_vertex_loader)
{
out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, "
"vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n"
"\n");
}
else
{
out.Write(
" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n"
" switch (texgen) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.Write(" }}\n"
"\n");
}
out.Write(" if ({} == {:s}) {{\n", BitfieldExtract<&TexMtxInfo::projection>("texMtxInfo"),
TexSize::STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
@ -514,6 +628,19 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
"}}\n");
}
static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent,
std::string_view name, std::string_view shader_type,
std::string_view stored_type, std::string_view offset_name)
{
if (host_config.backend_dynamic_vertex_loader)
{
code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent,
shader_type, name, shader_type, stored_type,
offset_name.empty() ? name : offset_name);
}
// else inputs are always available
}
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)
{
VertexShaderUid uid;