diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 098cf1f288..1d55d7bf3c 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -575,6 +575,102 @@ struct VAT return 0; } } + void SetTexElements(size_t idx, TexComponentCount value) + { + switch (idx) + { + case 0: + g0.Tex0CoordElements = value; + return; + case 1: + g1.Tex1CoordElements = value; + return; + case 2: + g1.Tex2CoordElements = value; + return; + case 3: + g1.Tex3CoordElements = value; + return; + case 4: + g1.Tex4CoordElements = value; + return; + case 5: + g2.Tex5CoordElements = value; + return; + case 6: + g2.Tex6CoordElements = value; + return; + case 7: + g2.Tex7CoordElements = value; + return; + default: + PanicAlertFmt("Invalid tex coord index {}", idx); + } + } + void SetTexFormat(size_t idx, ComponentFormat value) + { + switch (idx) + { + case 0: + g0.Tex0CoordFormat = value; + return; + case 1: + g1.Tex1CoordFormat = value; + return; + case 2: + g1.Tex2CoordFormat = value; + return; + case 3: + g1.Tex3CoordFormat = value; + return; + case 4: + g1.Tex4CoordFormat = value; + return; + case 5: + g2.Tex5CoordFormat = value; + return; + case 6: + g2.Tex6CoordFormat = value; + return; + case 7: + g2.Tex7CoordFormat = value; + return; + default: + PanicAlertFmt("Invalid tex coord index {}", idx); + } + } + void SetTexFrac(size_t idx, u8 value) + { + switch (idx) + { + case 0: + g0.Tex0Frac = value; + return; + case 1: + g1.Tex1Frac = value; + return; + case 2: + g1.Tex2Frac = value; + return; + case 3: + g1.Tex3Frac = value; + return; + case 4: + g2.Tex4Frac = value; + return; + case 5: + g2.Tex5Frac = value; + return; + case 6: + g2.Tex6Frac = value; + return; + case 7: + g2.Tex7Frac = value; + return; + default: + PanicAlertFmt("Invalid tex coord index {}", idx); + } + } }; template <> struct fmt::formatter diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index c2d3c1bd29..402a5efaf8 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -77,9 +77,6 @@ VertexLoader::VertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr) void VertexLoader::CompileVertexTranslator() { - // Reset pipeline - m_numPipelineStages = 0; - // Position in pc vertex format. int nat_offset = 0; @@ -149,9 +146,16 @@ void VertexLoader::CompileVertexTranslator() VertexLoader_Color::GetFunction(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i)); if (pFunc != nullptr) + { WriteCall(pFunc); + } else + { ASSERT(m_VtxDesc.low.Color[i] == VertexComponentFormat::NotPresent); + // Keep colIndex in sync if color 0 is absent but color 1 is present + if (i == 0 && m_VtxDesc.low.Color[1] != VertexComponentFormat::NotPresent) + WriteCall(VertexLoader_Color::GetDummyFunction()); + } if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) { @@ -213,12 +217,13 @@ void VertexLoader::CompileVertexTranslator() { // if there's more tex coords later, have to write a dummy call bool has_more = false; - for (size_t j = 0; j < m_VtxDesc.high.TexCoord.Size(); ++j) + for (size_t j = i + 1; j < m_VtxDesc.high.TexCoord.Size(); ++j) { if (m_VtxDesc.high.TexCoord[j] != VertexComponentFormat::NotPresent) { has_more = true; - WriteCall(VertexLoader_TextCoord::GetDummyFunction()); // important to get indices right! + // Keep tcIndex in sync so that the correct array is used later + WriteCall(VertexLoader_TextCoord::GetDummyFunction()); break; } else if (m_VtxDesc.low.TexMatIdx[j]) @@ -245,7 +250,7 @@ void VertexLoader::CompileVertexTranslator() void VertexLoader::WriteCall(TPipelineFunction func) { - m_PipelineStages[m_numPipelineStages++] = func; + m_PipelineStages.push_back(func); } int VertexLoader::RunVertices(const u8* src, u8* dst, int count) @@ -261,8 +266,8 @@ int VertexLoader::RunVertices(const u8* src, u8* dst, int count) m_tcIndex = 0; m_colIndex = 0; m_texmtxwrite = m_texmtxread = 0; - for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](this); + for (TPipelineFunction& func : m_PipelineStages) + func(this); PRIM_LOG("\n"); } diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index d1417401a4..478b186bc8 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -9,6 +9,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/SmallVector.h" #include "VideoCommon/VertexLoaderBase.h" class VertexLoader; @@ -38,8 +39,11 @@ public: private: // Pipeline. - TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower. - int m_numPipelineStages; + // 1 pos matrix + 8 texture matrices + 1 position + 1 normal or normal/binormal/tangent + // + 2 colors + 8 texture coordinates or dummy texture coordinates + 8 texture matrices + // merged into texture coordinates + 1 skip gives a maximum of 30 + // (Tested by VertexLoaderTest.LargeFloatVertexSpeed) + Common::SmallVector m_PipelineStages; void CompileVertexTranslator(); diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index fab4177fca..87cd19063a 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -16,6 +16,11 @@ namespace { +void Color_Read_Dummy(VertexLoader* loader) +{ + loader->m_colIndex++; +} + constexpr u32 alpha_mask = 0xFF000000; void SetCol(VertexLoader* loader, u32 val) @@ -201,3 +206,8 @@ TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, Co } return s_table_read_color[type][format]; } + +TPipelineFunction VertexLoader_Color::GetDummyFunction() +{ + return Color_Read_Dummy; +} diff --git a/Source/Core/VideoCommon/VertexLoader_Color.h b/Source/Core/VideoCommon/VertexLoader_Color.h index 0e66bfbf86..da56d61212 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.h +++ b/Source/Core/VideoCommon/VertexLoader_Color.h @@ -25,6 +25,9 @@ public: static TPipelineFunction GetFunction(VertexComponentFormat type, ColorFormat format); + // It is important to synchronize colIndex, or else the wrong color array will be used + static TPipelineFunction GetDummyFunction(); + private: template using EnumMap = typename Common::EnumMap; diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.h b/Source/Core/VideoCommon/VertexLoader_TextCoord.h index 9041185490..c71688e1e2 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.h +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.h @@ -22,7 +22,7 @@ public: static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format, TexComponentCount elements); - // It is important to synchronize tcIndex. + // It is important to synchronize tcIndex, or else the wrong texture coordinate array will be used static TPipelineFunction GetDummyFunction(); private: diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index c554db88ef..f467fcd23b 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -759,6 +759,284 @@ TEST_P(VertexLoaderNormalTest, NormalAll) } } +class VertexLoaderSkippedColorsTest : public VertexLoaderTest, + public ::testing::WithParamInterface> +{ +}; +INSTANTIATE_TEST_SUITE_P(AllCombinations, VertexLoaderSkippedColorsTest, + ::testing::Combine(::testing::Values(false, true), + ::testing::Values(false, true))); + +TEST_P(VertexLoaderSkippedColorsTest, SkippedColors) +{ + bool enable_color_0, enable_color_1; + std::tie(enable_color_0, enable_color_1) = GetParam(); + + size_t input_size = 1; + size_t output_size = 3 * sizeof(float); + size_t color_0_offset = 0; + size_t color_1_offset = 0; + + m_vtx_desc.low.Position = VertexComponentFormat::Index8; + if (enable_color_0) + { + m_vtx_desc.low.Color0 = VertexComponentFormat::Index8; + input_size++; + color_0_offset = output_size; + output_size += sizeof(u32); + } + if (enable_color_1) + { + m_vtx_desc.low.Color1 = VertexComponentFormat::Index8; + input_size++; + color_1_offset = output_size; + output_size += sizeof(u32); + } + + m_vtx_attr.g0.PosElements = CoordComponentCount::XYZ; + m_vtx_attr.g0.PosFormat = ComponentFormat::Float; + m_vtx_attr.g0.Color0Elements = ColorComponentCount::RGBA; + m_vtx_attr.g0.Color0Comp = ColorFormat::RGBA8888; + m_vtx_attr.g0.Color1Elements = ColorComponentCount::RGBA; + m_vtx_attr.g0.Color1Comp = ColorFormat::RGBA8888; + + CreateAndCheckSizes(input_size, output_size); + + // Vertex 0 + Input(1); + if (enable_color_0) + Input(1); + if (enable_color_1) + Input(1); + // Vertex 1 + Input(0); + if (enable_color_0) + Input(0); + if (enable_color_1) + Input(0); + // Position array + VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Position] = + sizeof(float); // so 1, 2, 3 for index 0; 2, 3, 4 for index 1 + Input(1.f); + Input(2.f); + Input(3.f); + Input(4.f); + // Color array 0 + VertexLoaderManager::cached_arraybases[CPArray::Color0] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Color0] = sizeof(u32); + Input(0x00010203u); + Input(0x04050607u); + // Color array 1 + VertexLoaderManager::cached_arraybases[CPArray::Color1] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Color1] = sizeof(u32); + Input(0x08090a0bu); + Input(0x0c0d0e0fu); + + ASSERT_EQ(m_loader->m_native_vtx_decl.colors[0].enable, enable_color_0); + if (enable_color_0) + ASSERT_EQ(m_loader->m_native_vtx_decl.colors[0].offset, color_0_offset); + ASSERT_EQ(m_loader->m_native_vtx_decl.colors[1].enable, enable_color_1); + if (enable_color_1) + ASSERT_EQ(m_loader->m_native_vtx_decl.colors[1].offset, color_1_offset); + + RunVertices(2); + // Vertex 0 + ExpectOut(2); + ExpectOut(3); + ExpectOut(4); + if (enable_color_0) + EXPECT_EQ((m_dst.Read()), 0x04050607u); + if (enable_color_1) + EXPECT_EQ((m_dst.Read()), 0x0c0d0e0fu); + // Vertex 1 + ExpectOut(1); + ExpectOut(2); + ExpectOut(3); + if (enable_color_0) + EXPECT_EQ((m_dst.Read()), 0x00010203u); + if (enable_color_1) + EXPECT_EQ((m_dst.Read()), 0x08090a0bu); +} + +class VertexLoaderSkippedTexCoordsTest : public VertexLoaderTest, + public ::testing::WithParamInterface +{ +public: + static constexpr u32 NUM_COMPONENTS_TO_TEST = 3; + static constexpr u32 NUM_PARAMETERS_PER_COMPONENT = 3; + static constexpr u32 NUM_COMBINATIONS = + 1 << (NUM_COMPONENTS_TO_TEST * NUM_PARAMETERS_PER_COMPONENT); +}; +INSTANTIATE_TEST_SUITE_P(AllCombinations, VertexLoaderSkippedTexCoordsTest, + ::testing::Range(0u, VertexLoaderSkippedTexCoordsTest::NUM_COMBINATIONS)); + +TEST_P(VertexLoaderSkippedTexCoordsTest, SkippedTextures) +{ + std::array enable_tex, enable_matrix, use_st; + const u32 param = GetParam(); + for (u32 component = 0; component < NUM_COMPONENTS_TO_TEST; component++) + { + const u32 bits = param >> (component * NUM_PARAMETERS_PER_COMPONENT); + enable_tex[component] = (bits & 1); + enable_matrix[component] = (bits & 2); + use_st[component] = (bits & 4); + } + + size_t input_size = 1; + size_t output_size = 3 * sizeof(float); + + std::array component_enabled{}; + std::array component_offset{}; + + m_vtx_desc.low.Position = VertexComponentFormat::Index8; + m_vtx_attr.g0.PosElements = CoordComponentCount::XYZ; + m_vtx_attr.g0.PosFormat = ComponentFormat::Float; + + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + if (enable_matrix[i] || enable_tex[i]) + { + component_enabled[i] = true; + component_offset[i] = output_size; + if (enable_matrix[i]) + { + output_size += 3 * sizeof(float); + } + else + { + if (use_st[i]) + { + output_size += 2 * sizeof(float); + } + else + { + output_size += sizeof(float); + } + } + } + if (enable_matrix[i]) + { + m_vtx_desc.low.TexMatIdx[i] = enable_matrix[i]; + input_size++; + } + if (enable_tex[i]) + { + m_vtx_desc.high.TexCoord[i] = VertexComponentFormat::Index8; + input_size++; + } + + m_vtx_attr.SetTexElements(i, use_st[i] ? TexComponentCount::ST : TexComponentCount::S); + m_vtx_attr.SetTexFormat(i, ComponentFormat::Float); + m_vtx_attr.SetTexFrac(i, 0); + } + + CreateAndCheckSizes(input_size, output_size); + + // Vertex 0 + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + if (enable_matrix[i]) + Input(u8(20 + i)); + } + Input(1); // Position + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + if (enable_tex[i]) + Input(1); + } + // Vertex 1 + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + if (enable_matrix[i]) + Input(u8(10 + i)); + } + Input(0); // Position + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + if (enable_tex[i]) + Input(0); + } + // Position array + VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Position] = + sizeof(float); // so 1, 2, 3 for index 0; 2, 3, 4 for index 1 + Input(1.f); + Input(2.f); + Input(3.f); + Input(4.f); + // Texture coord arrays + for (u8 i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + VertexLoaderManager::cached_arraybases[CPArray::TexCoord0 + i] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::TexCoord0 + i] = 2 * sizeof(float); + Input(i * 100 + 11); + Input(i * 100 + 12); + Input(i * 100 + 21); + Input(i * 100 + 22); + } + + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + ASSERT_EQ(m_loader->m_native_vtx_decl.texcoords[i].enable, component_enabled[i]); + if (component_enabled[i]) + ASSERT_EQ(m_loader->m_native_vtx_decl.texcoords[i].offset, component_offset[i]); + } + + RunVertices(2); + + // Vertex 0 + ExpectOut(2); + ExpectOut(3); + ExpectOut(4); + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + size_t num_read = 0; + if (enable_tex[i]) + { + ExpectOut(i * 100 + 21); + num_read++; + if (use_st[i]) + { + ExpectOut(i * 100 + 22); + num_read++; + } + } + if (enable_matrix[i]) + { + // With a matrix there are always 3 components; otherwise-unused components should be 0 + while (num_read++ < 2) + ExpectOut(0); + ExpectOut(20 + i); + } + } + // Vertex 1 + ExpectOut(1); + ExpectOut(2); + ExpectOut(3); + for (size_t i = 0; i < NUM_COMPONENTS_TO_TEST; i++) + { + size_t num_read = 0; + if (enable_tex[i]) + { + ExpectOut(i * 100 + 11); + num_read++; + if (use_st[i]) + { + ExpectOut(i * 100 + 12); + num_read++; + } + } + if (enable_matrix[i]) + { + // With a matrix there are always 3 components; otherwise-unused components should be 0 + while (num_read++ < 2) + ExpectOut(0); + ExpectOut(10 + i); + } + } +} + // For gtest, which doesn't know about our fmt::formatters by default static void PrintTo(const VertexComponentFormat& t, std::ostream* os) {