Merge pull request #13074 from Pokechu22/normal-cache

Cache normals in addition to binormals and tangents
This commit is contained in:
Tilka 2024-10-12 19:49:48 +01:00 committed by GitHub
commit d43c6dc555
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 140 additions and 83 deletions

View File

@ -98,7 +98,7 @@ static size_t s_state_writes_in_queue;
static std::condition_variable s_state_write_queue_is_empty;
// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 168; // Last changed in PR 12639
constexpr u32 STATE_VERSION = 169; // Last changed in PR 13074
// Increase this if the StateExtendedHeader definition changes
constexpr u32 EXTENDED_HEADER_VERSION = 1; // Last changed in PR 12217

View File

@ -81,9 +81,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
// transform this vertex so that it can be used for rasterization (outVertex)
OutputVertexData* outVertex = m_setup_unit.GetVertex();
TransformUnit::TransformPosition(&m_vertex, outVertex);
outVertex->normal = {};
if (VertexLoaderManager::g_current_components & VB_HAS_NORMAL)
TransformUnit::TransformNormal(&m_vertex, outVertex);
TransformUnit::TransformNormal(&m_vertex, outVertex);
TransformUnit::TransformColor(&m_vertex, outVertex);
TransformUnit::TransformTexCoord(&m_vertex, outVertex);
@ -209,6 +207,14 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int inde
{
ReadVertexAttribute<float>(&m_vertex.normal[i][0], src, vdec.normals[i], 0, 3, false);
}
if (!vdec.normals[0].enable)
{
auto& system = Core::System::GetInstance();
auto& vertex_shader_manager = system.GetVertexShaderManager();
m_vertex.normal[0][0] = vertex_shader_manager.constants.cached_normal[0];
m_vertex.normal[0][1] = vertex_shader_manager.constants.cached_normal[1];
m_vertex.normal[0][2] = vertex_shader_manager.constants.cached_normal[2];
}
if (!vdec.normals[1].enable)
{
auto& system = Core::System::GetInstance();

View File

@ -93,6 +93,7 @@ struct alignas(16) VertexShaderConstants
// .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha
std::array<uint4, 8> xfmem_pack1;
float4 cached_normal;
float4 cached_tangent;
float4 cached_binormal;
// For UberShader vertex loader

View File

@ -283,6 +283,7 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_PIXELCENTERCORRECTION "cpixelcenter"
#define I_VIEWPORT_SIZE "cviewport"
#define I_CACHED_NORMAL "cnormal"
#define I_CACHED_TANGENT "ctangent"
#define I_CACHED_BINORMAL "cbinormal"
@ -306,6 +307,7 @@ static const char s_shader_uniforms[] = "\tuint components;\n"
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n"
"\tfloat2 " I_VIEWPORT_SIZE ";\n"
"\tuint4 xfmem_pack1[8];\n"
"\tfloat4 " I_CACHED_NORMAL ";\n"
"\tfloat4 " I_CACHED_TANGENT ";\n"
"\tfloat4 " I_CACHED_BINORMAL ";\n"
"\tuint vertex_stride;\n"

View File

@ -251,47 +251,53 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
"\n"
"float3 _rawnormal;\n"
"float3 _rawtangent;\n"
"float3 _rawbinormal;\n"
"if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_NORMAL));
LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3");
out.Write(" _rawnormal = rawnormal;\n"
"}}\n"
"else\n"
"{{\n"
" _rawnormal = " I_CACHED_NORMAL ".xyz;\n"
"}}\n"
"\n"
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
"{{\n",
Common::ToUnderlying(VB_HAS_TANGENT));
LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3");
out.Write(" _rawtangent = rawtangent;\n"
"}}\n"
"else\n"
"{{\n"
" _rawtangent = " I_CACHED_TANGENT ".xyz;\n"
"}}\n"
"\n"
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_BINORMAL));
LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3");
out.Write(" _rawbinormal = rawbinormal;\n"
"}}\n"
"else\n"
"{{\n"
" _rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"
"}}\n"
"\n"
"// The scale of the transform matrix is used to control the size of the emboss map\n"
"// effect by changing the scale of the transformed binormals (which only get used by\n"
"// emboss map texgens). By normalising the first transformed normal (which is used\n"
"// by lighting calculations and needs to be unit length), the same transform matrix\n"
"// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n"
"float3 _normal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_NORMAL));
LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3");
out.Write(" _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"}}\n"
"\n"
"float3 _tangent = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
"{{\n",
Common::ToUnderlying(VB_HAS_TANGENT));
LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3");
out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n"
"}}\n"
"else\n"
"{{\n"
" _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT
".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n"
"}}\n"
"\n"
"float3 _binormal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_BINORMAL));
LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3");
out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n"
"}}\n"
"else\n"
"{{\n"
" _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL
".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n"
"}}\n"
"\n");
"float3 _normal = normalize(float3(dot(N0, _rawnormal), dot(N1, _rawnormal), dot(N2, "
"_rawnormal)));\n"
"float3 _tangent = float3(dot(N0, _rawtangent), dot(N1, _rawtangent), dot(N2, "
"_rawtangent));\n"
"float3 _binormal = float3(dot(N0, _rawbinormal), dot(N1, _rawbinormal), dot(N2, "
"_rawbinormal));\n");
// Hardware Lighting
out.Write("// xfmem.numColorChans controls the number of color channels available to TEV,\n"

View File

@ -164,6 +164,13 @@ void VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentFor
m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[0])
{
FixupBranch dont_store = CBNZ(remaining_reg);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::normal_cache.data());
m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
FixupBranch dont_store = CBNZ(remaining_reg);

View File

@ -68,6 +68,7 @@ public:
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> old_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> old_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> old_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> old_binormal_cache = VertexLoaderManager::binormal_cache;
@ -77,12 +78,14 @@ public:
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> a_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> a_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> a_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> a_binormal_cache = VertexLoaderManager::binormal_cache;
// Reset state before running b
VertexLoaderManager::position_matrix_index_cache = old_position_matrix_index_cache;
VertexLoaderManager::position_cache = old_position_cache;
VertexLoaderManager::normal_cache = old_normal_cache;
VertexLoaderManager::tangent_cache = old_tangent_cache;
VertexLoaderManager::binormal_cache = old_binormal_cache;
@ -92,6 +95,7 @@ public:
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> b_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> b_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> b_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> b_binormal_cache = VertexLoaderManager::binormal_cache;
@ -140,6 +144,12 @@ public:
fmt::join(b_position_cache[1], ", "), fmt::join(b_position_cache[2], ", "));
// The last element is allowed to be garbage for SIMD overwrites
ASSERT_MSG(VIDEO,
std::equal(a_normal_cache.begin(), a_normal_cache.begin() + 3,
b_normal_cache.begin(), b_normal_cache.begin() + 3, bit_equal),
"Expected matching normal caches after loading (a: {}; b: {})",
fmt::join(a_normal_cache, ", "), fmt::join(b_normal_cache, ", "));
ASSERT_MSG(VIDEO,
std::equal(a_tangent_cache.begin(), a_tangent_cache.begin() + 3,
b_tangent_cache.begin(), b_tangent_cache.begin() + 3, bit_equal),

View File

@ -40,6 +40,7 @@ namespace VertexLoaderManager
std::array<u32, 3> position_matrix_index_cache;
// 3 vertices, 4 floats each to allow SIMD overwrite
alignas(sizeof(std::array<float, 4>)) std::array<std::array<float, 4>, 3> position_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> normal_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> tangent_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> binormal_cache;

View File

@ -62,6 +62,10 @@ void UpdateVertexArrayPointers();
// These arrays are in reverse order.
extern std::array<std::array<float, 4>, 3> position_cache;
extern std::array<u32, 3> position_matrix_index_cache;
// Needed for the game "LIT", which has text that has lighting enabled, but doesn't have normal
// vectors. The normals from the last drawn object are used instead.
// See https://bugs.dolphin-emu.org/issues/13635
extern std::array<float, 4> normal_cache;
// Store the tangent and binormal vectors for games that use emboss texgens when the vertex format
// doesn't include them (e.g. RS2 and RS3). These too are 4 floats each for SIMD overwrites.
extern std::array<float, 4> tangent_cache;

View File

@ -137,6 +137,14 @@ void VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute,
MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[0])
{
TEST(32, R(remaining_reg), R(remaining_reg));
FixupBranch dont_store = J_CC(CC_NZ);
// For similar reasons, the cached normal is 4 floats each
MOVUPS(MPIC(VertexLoaderManager::normal_cache.data()), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
TEST(32, R(remaining_reg), R(remaining_reg));

View File

@ -49,7 +49,9 @@ void ReadIndirect(VertexLoader* loader, const T* data)
const float value = FracAdjust(Common::FromBigEndian(data[i]));
if (loader->m_remaining == 0)
{
if (i >= 3 && i < 6)
if (i < 3)
VertexLoaderManager::normal_cache[i] = value;
else if (i >= 3 && i < 6)
VertexLoaderManager::tangent_cache[i - 3] = value;
else if (i >= 6 && i < 9)
VertexLoaderManager::binormal_cache[i - 6] = value;

View File

@ -558,7 +558,7 @@ void VertexManagerBase::Flush()
pixel_shader_manager.constants.time_ms = seconds_elapsed * 1000;
}
CalculateBinormals(VertexLoaderManager::GetCurrentVertexFormat());
CalculateNormals(VertexLoaderManager::GetCurrentVertexFormat());
// Calculate ZSlope for zfreeze
const auto used_textures = UsedTextures();
std::vector<std::string> texture_names;
@ -699,6 +699,7 @@ void VertexManagerBase::DoState(PointerWrap& p)
}
p.Do(m_zslope);
p.Do(VertexLoaderManager::normal_cache);
p.Do(VertexLoaderManager::tangent_cache);
p.Do(VertexLoaderManager::binormal_cache);
}
@ -769,7 +770,7 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
m_zslope.dirty = true;
}
void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format)
void VertexManagerBase::CalculateNormals(NativeVertexFormat* format)
{
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
@ -794,6 +795,16 @@ void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format)
vertex_shader_manager.constants.cached_binormal = VertexLoaderManager::binormal_cache;
vertex_shader_manager.dirty = true;
}
if (vert_decl.normals[0].enable)
return;
VertexLoaderManager::normal_cache[3] = 0;
if (vertex_shader_manager.constants.cached_normal != VertexLoaderManager::normal_cache)
{
vertex_shader_manager.constants.cached_normal = VertexLoaderManager::normal_cache;
vertex_shader_manager.dirty = true;
}
}
void VertexManagerBase::UpdatePipelineConfig()

View File

@ -192,7 +192,7 @@ protected:
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;
void CalculateZSlope(NativeVertexFormat* format);
void CalculateBinormals(NativeVertexFormat* format);
void CalculateNormals(NativeVertexFormat* format);
BitSet32 UsedTextures() const;

View File

@ -312,56 +312,43 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("int posidx = int(posmtx.r);\n"
"float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n"
"float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n"
"float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
"float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
"float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
}
"float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n"
"int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
"float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
"float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
}
else
{
// One shared matrix
out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n"
"float4 P1 = " I_POSNORMALMATRIX "[1];\n"
"float4 P2 = " I_POSNORMALMATRIX "[2];\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
"float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
"float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
}
"float4 P2 = " I_POSNORMALMATRIX "[2];\n"
"float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
"float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
"float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
}
out.Write("// Multiply the position vector by the position matrix\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
if ((uid_data->components & VB_HAS_TANGENT) == 0)
out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
if ((uid_data->components & VB_HAS_BINORMAL) == 0)
out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
if ((uid_data->components & VB_HAS_NORMAL) == 0)
out.Write("float3 rawnormal = " I_CACHED_NORMAL ".xyz;\n");
if ((uid_data->components & VB_HAS_TANGENT) == 0)
out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
if ((uid_data->components & VB_HAS_BINORMAL) == 0)
out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
// The scale of the transform matrix is used to control the size of the emboss map effect, by
// changing the scale of the transformed binormals (which only get used by emboss map texgens).
// By normalising the first transformed normal (which is used by lighting calculations and needs
// to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
// and not scaling for lighting.
out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n"
"float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
}
else
{
out.Write("float3 _normal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _binormal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _tangent = float3(0.0, 0.0, 0.0);\n");
}
// The scale of the transform matrix is used to control the size of the emboss map effect, by
// changing the scale of the transformed binormals (which only get used by emboss map texgens).
// By normalising the first transformed normal (which is used by lighting calculations and needs
// to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
// and not scaling for lighting.
out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n"
"float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");

View File

@ -702,6 +702,7 @@ TEST_P(VertexLoaderNormalTest, NormalAll)
input_with_expected_type(i / 32.f);
// Pre-fill these values to detect if they're modified
VertexLoaderManager::normal_cache = {-42.f, -43.f, -44.f, -45.f};
VertexLoaderManager::binormal_cache = {42.f, 43.f, 44.f, 45.f};
VertexLoaderManager::tangent_cache = {46.f, 47.f, 48.f, 49.f};
@ -738,6 +739,9 @@ TEST_P(VertexLoaderNormalTest, NormalAll)
ExpectOut(10 / 32.f);
ExpectOut(11 / 32.f);
ExpectOut(12 / 32.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[0], 10 / 32.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[1], 11 / 32.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[2], 12 / 32.f);
if (elements == NormalComponentCount::NTB)
{
// Tangent
@ -759,6 +763,14 @@ TEST_P(VertexLoaderNormalTest, NormalAll)
}
}
if (addr == VertexComponentFormat::NotPresent)
{
// Expect these to not be written
EXPECT_EQ(VertexLoaderManager::normal_cache[0], -42.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[1], -43.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[2], -44.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[3], -45.f);
}
if (addr == VertexComponentFormat::NotPresent || elements == NormalComponentCount::N)
{
// Expect these to not be written