dolphin/Source/Core/VideoCommon/VertexLoaderBase.cpp
Pokechu22 937bb2aa2e Cache normals in addition to binormals and tangents
Fixes LIT (https://bugs.dolphin-emu.org/issues/13635). The text does not include normals, but has lighting enabled. With the previous default of (0, 0, 0), lighting was always black (as dot(X, (0, 0, 0)) is always 0). It seems like the normal from the map in the background (0, 0, 1) is re-used.

LIT also has the vertex color enabled while vertex color is not specified, the same as SMS's debug cubes; the default MissingColorValue GameINI value of solid white seems to work correctly in this case.
2024-10-12 10:32:41 -07:00

267 lines
11 KiB
C++

// Copyright 2014 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/VertexLoaderBase.h"
#include <array>
#include <bit>
#include <cstring>
#include <memory>
#include <string>
#include <vector>
#include <fmt/format.h>
#include <fmt/ranges.h>
#include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"
#ifdef _M_X86_64
#include "VideoCommon/VertexLoaderX64.h"
#elif defined(_M_ARM_64)
#include "VideoCommon/VertexLoaderARM64.h"
#endif
// a hacky implementation to compare two vertex loaders
class VertexLoaderTester : public VertexLoaderBase
{
public:
VertexLoaderTester(std::unique_ptr<VertexLoaderBase> a_, std::unique_ptr<VertexLoaderBase> b_,
const TVtxDesc& vtx_desc, const VAT& vtx_attr)
: VertexLoaderBase(vtx_desc, vtx_attr), a(std::move(a_)), b(std::move(b_))
{
ASSERT(a && b);
if (a->m_vertex_size == b->m_vertex_size && a->m_native_components == b->m_native_components &&
a->m_native_vtx_decl.stride == b->m_native_vtx_decl.stride)
{
// These are generated from the VAT and vertex desc, so they should match.
// m_native_vtx_decl.stride isn't set yet, though.
ASSERT(m_vertex_size == a->m_vertex_size && m_native_components == a->m_native_components);
memcpy(&m_native_vtx_decl, &a->m_native_vtx_decl, sizeof(PortableVertexDeclaration));
}
else
{
PanicAlertFmt("Can't compare vertex loaders that expect different vertex formats!\n"
"a: m_vertex_size {}, m_native_components {:#010x}, stride {}\n"
"b: m_vertex_size {}, m_native_components {:#010x}, stride {}",
a->m_vertex_size, a->m_native_components, a->m_native_vtx_decl.stride,
b->m_vertex_size, b->m_native_components, b->m_native_vtx_decl.stride);
}
}
int RunVertices(const u8* src, u8* dst, int count) override
{
buffer_a.resize(count * a->m_native_vtx_decl.stride + 4);
buffer_b.resize(count * b->m_native_vtx_decl.stride + 4);
const std::array<u32, 3> old_position_matrix_index_cache =
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> old_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> old_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> old_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> old_binormal_cache = VertexLoaderManager::binormal_cache;
const int count_a = a->RunVertices(src, buffer_a.data(), count);
const std::array<u32, 3> a_position_matrix_index_cache =
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> a_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> a_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> a_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> a_binormal_cache = VertexLoaderManager::binormal_cache;
// Reset state before running b
VertexLoaderManager::position_matrix_index_cache = old_position_matrix_index_cache;
VertexLoaderManager::position_cache = old_position_cache;
VertexLoaderManager::normal_cache = old_normal_cache;
VertexLoaderManager::tangent_cache = old_tangent_cache;
VertexLoaderManager::binormal_cache = old_binormal_cache;
const int count_b = b->RunVertices(src, buffer_b.data(), count);
const std::array<u32, 3> b_position_matrix_index_cache =
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> b_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> b_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> b_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> b_binormal_cache = VertexLoaderManager::binormal_cache;
ASSERT_MSG(VIDEO, count_a == count_b,
"The two vertex loaders have loaded a different amount of vertices (a: {}, b: {}).",
count_a, count_b);
ASSERT_MSG(VIDEO,
memcmp(buffer_a.data(), buffer_b.data(),
std::min(count_a, count_b) * m_native_vtx_decl.stride) == 0,
"The two vertex loaders have loaded different data. Configuration:"
"\nVertex desc:\n{}\n\nVertex attr:\n{}",
m_VtxDesc, m_VtxAttr);
ASSERT_MSG(VIDEO, a_position_matrix_index_cache == b_position_matrix_index_cache,
"Expected matching position matrix caches after loading (a: {}; b: {})",
fmt::join(a_position_matrix_index_cache, ", "),
fmt::join(b_position_matrix_index_cache, ", "));
// Some games (e.g. Donkey Kong Country Returns) have a few draws that contain NaN.
// Since NaN != NaN, we need to compare the bits instead.
const auto bit_equal = [](float val_a, float val_b) {
return std::bit_cast<u32>(val_a) == std::bit_cast<u32>(val_b);
};
// The last element is allowed to be garbage for SIMD overwrites.
// For XY, the last 2 are garbage.
const bool positions_match = [&] {
const size_t max_component = m_VtxAttr.g0.PosElements == CoordComponentCount::XYZ ? 3 : 2;
for (size_t vertex = 0; vertex < 3; vertex++)
{
if (!std::equal(a_position_cache[vertex].begin(),
a_position_cache[vertex].begin() + max_component,
b_position_cache[vertex].begin(), bit_equal))
{
return false;
}
}
return true;
}();
ASSERT_MSG(VIDEO, positions_match,
"Expected matching position caches after loading (a: {} / {} / {}; b: {} / {} / {})",
fmt::join(a_position_cache[0], ", "), fmt::join(a_position_cache[1], ", "),
fmt::join(a_position_cache[2], ", "), fmt::join(b_position_cache[0], ", "),
fmt::join(b_position_cache[1], ", "), fmt::join(b_position_cache[2], ", "));
// The last element is allowed to be garbage for SIMD overwrites
ASSERT_MSG(VIDEO,
std::equal(a_normal_cache.begin(), a_normal_cache.begin() + 3,
b_normal_cache.begin(), b_normal_cache.begin() + 3, bit_equal),
"Expected matching normal caches after loading (a: {}; b: {})",
fmt::join(a_normal_cache, ", "), fmt::join(b_normal_cache, ", "));
ASSERT_MSG(VIDEO,
std::equal(a_tangent_cache.begin(), a_tangent_cache.begin() + 3,
b_tangent_cache.begin(), b_tangent_cache.begin() + 3, bit_equal),
"Expected matching tangent caches after loading (a: {}; b: {})",
fmt::join(a_tangent_cache, ", "), fmt::join(b_tangent_cache, ", "));
ASSERT_MSG(VIDEO,
std::equal(a_binormal_cache.begin(), a_binormal_cache.begin() + 3,
b_binormal_cache.begin(), b_binormal_cache.begin() + 3, bit_equal),
"Expected matching binormal caches after loading (a: {}; b: {})",
fmt::join(a_binormal_cache, ", "), fmt::join(b_binormal_cache, ", "));
memcpy(dst, buffer_a.data(), count_a * m_native_vtx_decl.stride);
m_numLoadedVertices += count;
return count_a;
}
private:
std::unique_ptr<VertexLoaderBase> a;
std::unique_ptr<VertexLoaderBase> b;
std::vector<u8> buffer_a;
std::vector<u8> buffer_b;
};
u32 VertexLoaderBase::GetVertexSize(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
{
u32 size = 0;
// Each enabled TexMatIdx adds one byte, as does PosMatIdx
size += std::popcount(vtx_desc.low.Hex & 0x1FF);
const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements);
size += pos_size;
const u32 norm_size =
VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
size += norm_size;
for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
const u32 color_size =
VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
size += color_size;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
const u32 tc_size = VertexLoader_TextCoord::GetSize(
vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
size += tc_size;
}
return size;
}
u32 VertexLoaderBase::GetVertexComponents(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
{
u32 components = 0;
if (vtx_desc.low.PosMatIdx)
components |= VB_HAS_POSMTXIDX;
for (u32 i = 0; i < vtx_desc.low.TexMatIdx.Size(); i++)
{
if (vtx_desc.low.TexMatIdx[i])
components |= VB_HAS_TEXMTXIDX0 << i;
}
// Vertices always have positions; thus there is no VB_HAS_POS as it would always be set
if (vtx_desc.low.Normal != VertexComponentFormat::NotPresent)
{
components |= VB_HAS_NORMAL;
if (vtx_attr.g0.NormalElements == NormalComponentCount::NTB)
components |= VB_HAS_TANGENT | VB_HAS_BINORMAL;
}
for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
if (vtx_desc.low.Color[i] != VertexComponentFormat::NotPresent)
components |= VB_HAS_COL0 << i;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
if (vtx_desc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
components |= VB_HAS_UV0 << i;
}
return components;
}
std::unique_ptr<VertexLoaderBase> VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc,
const VAT& vtx_attr)
{
std::unique_ptr<VertexLoaderBase> loader = nullptr;
// #define COMPARE_VERTEXLOADERS
#if defined(_M_X86_64)
loader = std::make_unique<VertexLoaderX64>(vtx_desc, vtx_attr);
#elif defined(_M_ARM_64)
loader = std::make_unique<VertexLoaderARM64>(vtx_desc, vtx_attr);
#endif
// Use the software loader as a fallback
// (not currently applicable, as both VertexLoaderX64 and VertexLoaderARM64
// are always usable, but if a loader that only works on some CPUs is created
// then this fallback would be used)
if (!loader)
loader = std::make_unique<VertexLoader>(vtx_desc, vtx_attr);
#if defined(COMPARE_VERTEXLOADERS)
return std::make_unique<VertexLoaderTester>(
std::make_unique<VertexLoader>(vtx_desc, vtx_attr), // the software one
std::move(loader), // the new one to compare
vtx_desc, vtx_attr);
#else
return loader;
#endif
}