From 327126d1e86c5b923aa4f0238b9ac983d1b761d9 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 18 Nov 2021 12:56:25 -0800 Subject: [PATCH 01/23] ShaderGenCommon: Add WriteSwitch --- Source/Core/VideoCommon/ShaderGenCommon.h | 61 ++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 367a472294..88a303356b 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -13,10 +13,11 @@ #include "Common/BitField.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/StringUtil.h" #include "Common/TypeUtils.h" -enum class APIType; +#include "VideoCommon/VideoCommon.h" /** * Common interface for classes that need to go through the shader generation path @@ -210,6 +211,64 @@ std::string BitfieldExtract(std::string_view source) static_cast(BitFieldT::NumBits())); } +template +void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, + const Common::EnumMap& values, int indent, + bool break_) +{ + const bool make_switch = (ApiType == APIType::D3D); + + // The second template argument is needed to avoid compile errors from ambiguity with multiple + // enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW + // and https://godbolt.org/z/hz7Yqq1P5 + using enum_type = decltype(last_member); + + // {:{}} is used to indent by formatting an empty string with a variable width + if (make_switch) + { + out.Write("{:{}}switch ({}) {{\n", "", indent, variable); + for (u32 i = 0; i <= static_cast(last_member); i++) + { + const enum_type key = static_cast(i); + + // Assumes existence of an EnumFormatter + out.Write("{:{}}case {:s}:\n", "", indent, key); + // Note that this indentation behaves poorly for multi-line code + if (!values[key].empty()) + out.Write("{:{}} {}\n", "", indent, values[key]); + if (break_) + out.Write("{:{}} break;\n", "", indent); + } + out.Write("{:{}}}}\n", "", indent); + } + else + { + // Generate a tree of if statements recursively + // std::function must be used because auto won't capture before initialization and thus can't be + // used recursively + std::function BuildTree = [&](u32 cur_indent, u32 low, u32 high) { + // Each generated statement is for low <= x < high + if (high == low + 1) + { + // Down to 1 case (low <= x < low + 1 means x == low) + const enum_type key = static_cast(low); + // Note that this indentation behaves poorly for multi-line code + out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key); + } + else + { + u32 mid = low + ((high - low) / 2); + out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid); + BuildTree(cur_indent + 2, low, mid); + out.Write("{:{}}}} else {{\n", "", cur_indent); + BuildTree(cur_indent + 2, mid, high); + out.Write("{:{}}}}\n", "", cur_indent); + } + }; + BuildTree(indent, 0, static_cast(last_member) + 1); + } +} + // Constant variable names #define I_COLORS "color" #define I_KCOLORS "k" From 2b1d1038a6acf0acf75b03ca93c85de6fe6cdf18 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 25 Apr 2021 16:04:13 -0700 Subject: [PATCH 02/23] VertexLoader: Convert to EnumMap --- .../Core/VideoCommon/VertexLoader_Color.cpp | 50 +++-- .../Core/VideoCommon/VertexLoader_Normal.cpp | 181 +++++++-------- Source/Core/VideoCommon/VertexLoader_Normal.h | 4 +- .../VideoCommon/VertexLoader_Position.cpp | 212 ++++++++---------- .../VideoCommon/VertexLoader_TextCoord.cpp | 210 ++++++++--------- 5 files changed, 296 insertions(+), 361 deletions(-) diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 4f2e04b0b4..4e71889bbd 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -6,6 +6,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/MsgHandler.h" #include "Common/Swap.h" @@ -175,21 +176,40 @@ void Color_ReadDirect_32b_8888(VertexLoader* loader) SetCol(loader, DataReadU32Unswapped()); } -constexpr TPipelineFunction s_table_read_color[4][6] = { - {nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}, - {Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x, - Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}, - {Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, - Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, Color_ReadIndex_32b_8888}, - {Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, - Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, Color_ReadIndex_32b_8888}, +using Common::EnumMap; + +// These functions are to work around a "too many initializer values" error with nested brackets +// C++ does not let you write std::array, 2> a = {{1, 2}, {3, 4}} +// (although it does allow std::array, 2> b = {1, 2, 3, 4}) +constexpr EnumMap +f(EnumMap in) +{ + return in; +} +constexpr EnumMap g(EnumMap in) +{ + return in; +} + +template +using Table = EnumMap, VertexComponentFormat::Index16>; + +constexpr Table s_table_read_color = { + f({nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}), + f({Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x, + Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}), + f({Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, + Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, Color_ReadIndex_32b_8888}), + f({Color_ReadIndex_16b_565, Color_ReadIndex_24b_888, Color_ReadIndex_32b_888x, + Color_ReadIndex_16b_4444, Color_ReadIndex_24b_6666, + Color_ReadIndex_32b_8888}), }; -constexpr u32 s_table_read_color_vertex_size[4][6] = { - {0, 0, 0, 0, 0, 0}, - {2, 3, 4, 2, 3, 4}, - {1, 1, 1, 1, 1, 1}, - {2, 2, 2, 2, 2, 2}, +constexpr Table s_table_read_color_vertex_size = { + g({0u, 0u, 0u, 0u, 0u, 0u}), + g({2u, 3u, 4u, 2u, 3u, 4u}), + g({1u, 1u, 1u, 1u, 1u, 1u}), + g({2u, 2u, 2u, 2u, 2u, 2u}), }; } // Anonymous namespace @@ -200,7 +220,7 @@ u32 VertexLoader_Color::GetSize(VertexComponentFormat type, ColorFormat format) PanicAlertFmt("Invalid color format {}", format); return 0; } - return s_table_read_color_vertex_size[u32(type)][u32(format)]; + return s_table_read_color_vertex_size[type][format]; } TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format) @@ -210,5 +230,5 @@ TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, Co PanicAlertFmt("Invalid color format {}", format); return nullptr; } - return s_table_read_color[u32(type)][u32(format)]; + return s_table_read_color[type][format]; } diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index a69f78c887..f19f27eda3 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -7,6 +7,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoader.h" @@ -98,39 +99,6 @@ struct Normal_Index_Indices3 static constexpr u32 size = sizeof(I) * 3; }; -enum NormalType -{ - NRM_NOT_PRESENT = 0, - NRM_DIRECT = 1, - NRM_INDEX8 = 2, - NRM_INDEX16 = 3, - NUM_NRM_TYPE -}; - -enum NormalFormat -{ - FORMAT_UBYTE = 0, - FORMAT_BYTE = 1, - FORMAT_USHORT = 2, - FORMAT_SHORT = 3, - FORMAT_FLOAT = 4, - NUM_NRM_FORMAT -}; - -enum NormalElements -{ - NRM_NBT = 0, - NRM_NBT3 = 1, - NUM_NRM_ELEMENTS -}; - -enum NormalIndices -{ - NRM_INDICES1 = 0, - NRM_INDICES3 = 1, - NUM_NRM_INDICES -}; - struct Set { template @@ -145,83 +113,88 @@ struct Set TPipelineFunction function; }; -using Formats = std::array; -using Elements = std::array; -using Indices = std::array; -using Types = std::array; +using Common::EnumMap; +using Formats = EnumMap; +using Elements = EnumMap; +using Indices = std::array; +using Types = EnumMap; constexpr Types InitializeTable() { Types table{}; - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + using VCF = VertexComponentFormat; + using NCC = NormalComponentCount; + using FMT = ComponentFormat; - // Same as above - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); - table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_Direct(); + table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::Short] = Normal_Direct(); + table[VCF::Direct][false][NCC::NBT][FMT::Float] = Normal_Direct(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + // Same as above, since there are no indices + table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_Direct(); + table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::UByte] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::Byte] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::UShort] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::Short] = Normal_Direct(); + table[VCF::Direct][true][NCC::NBT][FMT::Float] = Normal_Direct(); - // Same as above for NRM_NBT - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); - table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); + table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::UByte] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::Byte] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::UShort] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::Short] = Normal_Index(); + table[VCF::Index8][false][NCC::NBT][FMT::Float] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + // Same for NormalComponentCount::N; differs for NBT + table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index8][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3(); + table[VCF::Index8][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3(); - // Same as above for NRM_NBT - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); - table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); + table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::UByte] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::Byte] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::UShort] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::Short] = Normal_Index(); + table[VCF::Index16][false][NCC::NBT][FMT::Float] = Normal_Index(); + + // Same for NormalComponentCount::N; differs for NBT + table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_Index(); + table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_Index(); + table[VCF::Index16][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3(); + table[VCF::Index16][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3(); return table; } @@ -230,14 +203,14 @@ constexpr Types s_table = InitializeTable(); } // Anonymous namespace u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3) + NormalComponentCount elements, bool index3) { - return s_table[u32(type)][index3][u32(elements)][u32(format)].gc_size; + return s_table[type][index3][elements][format].gc_size; } TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3) + NormalComponentCount elements, bool index3) { - return s_table[u32(type)][index3][u32(elements)][u32(format)].function; + return s_table[type][index3][elements][format].function; } diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.h b/Source/Core/VideoCommon/VertexLoader_Normal.h index f416c590c9..30674159fb 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.h +++ b/Source/Core/VideoCommon/VertexLoader_Normal.h @@ -14,8 +14,8 @@ class VertexLoader_Normal { public: static u32 GetSize(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3); + NormalComponentCount elements, bool index3); static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format, - NormalComponentCount elements, u32 index3); + NormalComponentCount elements, bool index3); }; diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 273cccebc3..37b15de53c 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -7,6 +7,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Swap.h" #include "VideoCommon/DataReader.h" @@ -76,138 +77,109 @@ void Pos_ReadIndex(VertexLoader* loader) LOG_VTX(); } -constexpr TPipelineFunction s_table_read_position[4][8][2] = { - { - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - }, - { - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - { - Pos_ReadDirect, - Pos_ReadDirect, - }, - }, - { - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - }, - { - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - { - Pos_ReadIndex, - Pos_ReadIndex, - }, - }, +using Common::EnumMap; + +// These functions are to work around a "too many initializer values" error with nested brackets +// C++ does not let you write std::array, 2> a = {{1, 2}, {3, 4}} +// (although it does allow std::array, 2> b = {1, 2, 3, 4}) +constexpr EnumMap e(TPipelineFunction xy, + TPipelineFunction xyz) +{ + return {xy, xyz}; +} +constexpr EnumMap e(u32 xy, u32 xyz) +{ + return {xy, xyz}; +} + +constexpr EnumMap, ComponentFormat::Float> +f(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +constexpr EnumMap, ComponentFormat::Float> +g(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +template +using Table = EnumMap, ComponentFormat::Float>, + VertexComponentFormat::Index16>; + +constexpr Table s_table_read_position = { + f({ + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + }), + f({ + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + e(Pos_ReadDirect, Pos_ReadDirect), + }), + f({ + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + }), + f({ + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + e(Pos_ReadIndex, Pos_ReadIndex), + }), }; -constexpr u32 s_table_read_position_vertex_size[4][8][2] = { - { - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - }, - { - {2, 3}, - {2, 3}, - {4, 6}, - {4, 6}, - {8, 12}, - }, - { - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - }, - { - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - }, +constexpr Table s_table_read_position_vertex_size = { + g({ + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + }), + g({ + e(2, 3), + e(2, 3), + e(4, 6), + e(4, 6), + e(8, 12), + }), + g({ + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + }), + g({ + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + }), }; } // Anonymous namespace u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format, CoordComponentCount elements) { - return s_table_read_position_vertex_size[u32(type)][u32(format)][u32(elements)]; + return s_table_read_position_vertex_size[type][format][elements]; } TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type, ComponentFormat format, CoordComponentCount elements) { - return s_table_read_position[u32(type)][u32(format)][u32(elements)]; + return s_table_read_position[type][format][elements]; } diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index aa01ab0bf3..f5741f6423 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -67,140 +67,110 @@ void TexCoord_ReadIndex(VertexLoader* loader) ++loader->m_tcIndex; } -constexpr TPipelineFunction s_table_read_tex_coord[4][8][2] = { - { - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - { - nullptr, - nullptr, - }, - }, - { - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - { - TexCoord_ReadDirect, - TexCoord_ReadDirect, - }, - }, - { - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - }, - { - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - { - TexCoord_ReadIndex, - TexCoord_ReadIndex, - }, - }, +using Common::EnumMap; +// These functions are to work around a "too many initializer values" error with nested brackets +// C++ does not let you write std::array, 2> a = {{1, 2}, {3, 4}} +// (although it does allow std::array, 2> b = {1, 2, 3, 4}) +constexpr EnumMap e(TPipelineFunction s, + TPipelineFunction st) +{ + return {s, st}; +} +constexpr EnumMap e(u32 s, u32 st) +{ + return {s, st}; +} + +constexpr EnumMap, ComponentFormat::Float> +f(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +constexpr EnumMap, ComponentFormat::Float> +g(EnumMap, ComponentFormat::Float> in) +{ + return in; +} + +template +using Table = EnumMap, ComponentFormat::Float>, + VertexComponentFormat::Index16>; + +constexpr Table s_table_read_tex_coord = { + f({ + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + e(nullptr, nullptr), + }), + f({ + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + e(TexCoord_ReadDirect, TexCoord_ReadDirect), + }), + f({ + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + }), + f({ + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + e(TexCoord_ReadIndex, TexCoord_ReadIndex), + }), }; -constexpr u32 s_table_read_tex_coord_vertex_size[4][8][2] = { - { - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - }, - { - {1, 2}, - {1, 2}, - {2, 4}, - {2, 4}, - {4, 8}, - }, - { - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - {1, 1}, - }, - { - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}, - }, +constexpr Table s_table_read_tex_coord_vertex_size = { + g({ + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + e(0u, 0u), + }), + g({ + e(1, 2), + e(1, 2), + e(2, 4), + e(2, 4), + e(4, 8), + }), + g({ + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + e(1, 1), + }), + g({ + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + e(2, 2), + }), }; } // Anonymous namespace u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format, TexComponentCount elements) { - return s_table_read_tex_coord_vertex_size[u32(type)][u32(format)][u32(elements)]; + return s_table_read_tex_coord_vertex_size[type][format][elements]; } TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type, ComponentFormat format, TexComponentCount elements) { - return s_table_read_tex_coord[u32(type)][u32(format)][u32(elements)]; + return s_table_read_tex_coord[type][format][elements]; } TPipelineFunction VertexLoader_TextCoord::GetDummyFunction() From 380b333387767e6c6fa95481a41767014fd513e8 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 27 Apr 2021 15:29:39 -0700 Subject: [PATCH 03/23] PixelShaderGen: Convert to EnumMap --- Source/Core/VideoCommon/PixelShaderGen.cpp | 211 ++++++++++----------- 1 file changed, 104 insertions(+), 107 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6261f57812..6c12a9607a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -8,6 +8,7 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/BoundingBox.h" @@ -40,7 +41,7 @@ enum : u32 C_PENVCONST_END = C_EFBSCALE + 1 }; -constexpr std::array tev_ksel_table_c{ +constexpr Common::EnumMap tev_ksel_table_c{ "255,255,255", // 1 = 0x00 "223,223,223", // 7_8 = 0x01 "191,191,191", // 3_4 = 0x02 @@ -75,7 +76,7 @@ constexpr std::array tev_ksel_table_c{ I_KCOLORS "[3].aaa", // K3_A = 0x1F }; -constexpr std::array tev_ksel_table_a{ +constexpr Common::EnumMap tev_ksel_table_a{ "255", // 1 = 0x00 "223", // 7_8 = 0x01 "191", // 3_4 = 0x02 @@ -110,7 +111,7 @@ constexpr std::array tev_ksel_table_a{ I_KCOLORS "[3].a", // K3_A = 0x1F }; -constexpr std::array tev_c_input_table{ +constexpr Common::EnumMap tev_c_input_table{ "prev.rgb", // CPREV, "prev.aaa", // APREV, "c0.rgb", // C0, @@ -129,7 +130,7 @@ constexpr std::array tev_c_input_table{ "int3(0,0,0)", // ZERO }; -constexpr std::array tev_a_input_table{ +constexpr Common::EnumMap tev_a_input_table{ "prev.a", // APREV, "c0.a", // A0, "c1.a", // A1, @@ -140,7 +141,7 @@ constexpr std::array tev_a_input_table{ "0", // ZERO }; -constexpr std::array tev_ras_table{ +constexpr Common::EnumMap tev_ras_table{ "iround(col0 * 255.0)", "iround(col1 * 255.0)", "ERROR13", // 2 @@ -151,14 +152,14 @@ constexpr std::array tev_ras_table{ "int4(0, 0, 0, 0)", // zero }; -constexpr std::array tev_c_output_table{ +constexpr Common::EnumMap tev_c_output_table{ "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb", }; -constexpr std::array tev_a_output_table{ +constexpr Common::EnumMap tev_a_output_table{ "prev.a", "c0.a", "c1.a", @@ -1160,11 +1161,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; if (last_cc.dest != TevOutput::Prev) { - out.Write("\tprev.rgb = {};\n", tev_c_output_table[u32(last_cc.dest.Value())]); + out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]); } if (last_ac.dest != TevOutput::Prev) { - out.Write("\tprev.a = {};\n", tev_a_output_table[u32(last_ac.dest.Value())]); + out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]); } } out.Write("\tprev = prev & 255;\n"); @@ -1277,6 +1278,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, APIType api_type, bool stereo) { + using Common::EnumMap; + const auto& stage = uid_data->stagehash[n]; out.Write("\n\t// TEV stage {}\n", n); @@ -1303,7 +1306,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i // using iindtex{} as the offset coords if (has_ind_stage && tevind.bs != IndTexBumpAlpha::Off) { - static constexpr std::array tev_ind_alpha_sel{ + static constexpr EnumMap tev_ind_alpha_sel{ "", "x", "y", @@ -1316,16 +1319,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i // https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L3038-L3041 // https://github.com/devkitPro/libogc/blob/bd24a9b3f59502f9b30d6bac0ae35fc485045f78/gc/ogc/gx.h#L790-L800 - static constexpr std::array tev_ind_alpha_shift{ + static constexpr EnumMap tev_ind_alpha_shift{ '0', // ITF_8: 0bXXXXXYYY -> 0bXXXXX000? No shift? '5', // ITF_5: 0bIIIIIAAA -> 0bAAA00000, shift of 5 '4', // ITF_4: 0bIIIIAAAA -> 0bAAAA0000, shift of 4 '3', // ITF_3: 0bIIIAAAAA -> 0bAAAAA000, shift of 3 }; - out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt.Value(), - tev_ind_alpha_sel[u32(tevind.bs.Value())], - tev_ind_alpha_shift[u32(tevind.fmt.Value())]); + out.Write("\talphabump = (iindtex{}.{} << {}) & 248;\n", tevind.bt, + tev_ind_alpha_sel[tevind.bs], tev_ind_alpha_shift[tevind.fmt]); } else { @@ -1335,23 +1337,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (has_ind_stage && tevind.matrix_index != IndMtxIndex::Off) { // format - static constexpr std::array tev_ind_fmt_shift{ + static constexpr EnumMap tev_ind_fmt_shift{ '0', // ITF_8: 0bXXXXXXXX -> 0bXXXXXXXX, no shift '3', // ITF_5: 0bIIIIIAAA -> 0b000IIIII, shift of 3 '4', // ITF_4: 0bIIIIAAAA -> 0b0000IIII, shift of 4 '5', // ITF_3: 0bIIIAAAAA -> 0b00000III, shift of 5 }; - out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt.Value(), - tev_ind_fmt_shift[u32(tevind.fmt.Value())]); + out.Write("\tint3 iindtevcrd{} = iindtex{} >> {};\n", n, tevind.bt, + tev_ind_fmt_shift[tevind.fmt]); // bias - TODO: Check if this needs to be this complicated... // indexed by bias - static constexpr std::array tev_ind_bias_field{ + static constexpr EnumMap tev_ind_bias_field{ "", "x", "y", "xy", "z", "xz", "yz", "xyz", }; // indexed by fmt - static constexpr std::array tev_ind_bias_add{ + static constexpr EnumMap tev_ind_bias_add{ "-128", "1", "1", @@ -1361,22 +1363,19 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (tevind.bias == IndTexBias::S || tevind.bias == IndTexBias::T || tevind.bias == IndTexBias::U) { - out.Write("\tiindtevcrd{}.{} += int({});\n", n, - tev_ind_bias_field[u32(tevind.bias.Value())], - tev_ind_bias_add[u32(tevind.fmt.Value())]); + out.Write("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt]); } else if (tevind.bias == IndTexBias::ST || tevind.bias == IndTexBias::SU || tevind.bias == IndTexBias::TU_) { - out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, - tev_ind_bias_field[u32(tevind.bias.Value())], - tev_ind_bias_add[u32(tevind.fmt.Value())]); + out.Write("\tiindtevcrd{0}.{1} += int2({2}, {2});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt]); } else if (tevind.bias == IndTexBias::STU) { out.Write("\tiindtevcrd{0}.{1} += int3({2}, {2}, {2});\n", n, - tev_ind_bias_field[u32(tevind.bias.Value())], - tev_ind_bias_add[u32(tevind.fmt.Value())]); + tev_ind_bias_field[tevind.bias], tev_ind_bias_add[tevind.fmt]); } // Multiplied by 2 because each matrix has two rows. @@ -1535,7 +1534,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i '\0', }; - out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap); + out.Write("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap); } if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0) @@ -1567,8 +1566,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i cc.d == TevColorArg::Konst || ac.a == TevAlphaArg::Konst || ac.b == TevAlphaArg::Konst || ac.c == TevAlphaArg::Konst || ac.d == TevAlphaArg::Konst) { - out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[u32(stage.tevksel_kc)], - tev_ksel_table_a[u32(stage.tevksel_ka)]); + out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc], + tev_ksel_table_a[stage.tevksel_ka]); if (u32(stage.tevksel_kc) > 7) { @@ -1599,51 +1598,50 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VECTOR_BITWISE_AND)) { - out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.a.Value())], - tev_a_input_table[u32(ac.a.Value())]); - out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.b.Value())], - tev_a_input_table[u32(ac.b.Value())]); - out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[u32(cc.c.Value())], - tev_a_input_table[u32(ac.c.Value())]); + out.Write("\ttevin_a = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.a], + tev_a_input_table[ac.a]); + out.Write("\ttevin_b = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.b], + tev_a_input_table[ac.b]); + out.Write("\ttevin_c = int4({} & 255, {} & 255);\n", tev_c_input_table[cc.c], + tev_a_input_table[ac.c]); } else { - out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", - tev_c_input_table[u32(cc.a.Value())], tev_a_input_table[u32(ac.a.Value())]); - out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", - tev_c_input_table[u32(cc.b.Value())], tev_a_input_table[u32(ac.b.Value())]); - out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", - tev_c_input_table[u32(cc.c.Value())], tev_a_input_table[u32(ac.c.Value())]); + out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a], + tev_a_input_table[ac.a]); + out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b], + tev_a_input_table[ac.b]); + out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c], + tev_a_input_table[ac.c]); } - out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[u32(cc.d.Value())], - tev_a_input_table[u32(ac.d.Value())]); + out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]); out.Write("\t// color combine\n"); - out.Write("\t{} = clamp(", tev_c_output_table[u32(cc.dest.Value())]); + out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]); if (cc.bias != TevBias::Compare) { WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale, false); } else { - static constexpr std::array function_table{ - "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8, GT - "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // R8, TevComparison::EQ - "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : " - "int3(0,0,0))", // GR16, GT - "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : " - "int3(0,0,0))", // GR16, EQ - "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : " - "int3(0,0,0))", // BGR24, GT - "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : " - "int3(0,0,0))", // BGR24, EQ - "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8, GT - "((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8, EQ + static constexpr EnumMap tev_rgb_comparison_gt{ + "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TevCompareMode::R8 + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16 + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24 + "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // RGB8 }; - const u32 mode = (u32(cc.compare_mode.Value()) << 1) | u32(cc.comparison.Value()); - out.Write(" tevin_d.rgb + "); - out.Write("{}", function_table[mode]); + static constexpr EnumMap tev_rgb_comparison_eq{ + "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0))", // TevCompareMode::R8 + "((idot(tevin_a.rgb,comp16) == idot(tevin_b.rgb,comp16)) ? tevin_c.rgb : int3(0,0,0))", // GR16 + "((idot(tevin_a.rgb,comp24) == idot(tevin_b.rgb,comp24)) ? tevin_c.rgb : int3(0,0,0))", // BGR24 + "((int3(1,1,1) - sign(abs(tevin_a.rgb - tevin_b.rgb))) * tevin_c.rgb)" // RGB8 + }; + + if (cc.comparison == TevComparison::EQ) + out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_eq[cc.compare_mode]); + else + out.Write(" tevin_d.rgb + {}", tev_rgb_comparison_gt[cc.compare_mode]); } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); @@ -1652,27 +1650,31 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i out.Write(";\n"); out.Write("\t// alpha combine\n"); - out.Write("\t{} = clamp(", tev_a_output_table[u32(ac.dest.Value())]); + out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]); if (ac.bias != TevBias::Compare) { WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale, true); } else { - static constexpr std::array function_table{ - "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8, GT - "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // R8, TevComparison::EQ - "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, GT - "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, EQ - "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, GT - "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, EQ - "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8, GT - "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // A8, EQ + static constexpr EnumMap tev_a_comparison_gt{ + "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8 + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16 + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24 + "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // A8 }; - const u32 mode = (u32(ac.compare_mode.Value()) << 1) | u32(ac.comparison.Value()); - out.Write(" tevin_d.a + "); - out.Write("{}", function_table[mode]); + static constexpr EnumMap tev_a_comparison_eq{ + "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TevCompareMode::R8 + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // GR16, + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // BGR24, + "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)", // A8 + }; + + if (ac.comparison == TevComparison::EQ) + out.Write(" tevin_d.a + {}", tev_a_comparison_eq[ac.compare_mode]); + else + out.Write(" tevin_d.a + {}", tev_a_comparison_gt[ac.compare_mode]); } if (ac.clamp) out.Write(", 0, 255)"); @@ -1685,36 +1687,33 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op, bool clamp, TevScale scale, bool alpha) { - static constexpr std::array tev_scale_table_left{ + static constexpr Common::EnumMap tev_scale_table_left{ "", // Scale1 " << 1", // Scale2 " << 2", // Scale4 "", // Divide2 }; - static constexpr std::array tev_scale_table_right{ + static constexpr Common::EnumMap tev_scale_table_right{ "", // Scale1 "", // Scale2 "", // Scale4 " >> 1", // Divide2 }; - // indexed by 2*op+(scale==Divide2) - static constexpr std::array tev_lerp_bias{ - "", + static constexpr Common::EnumMap tev_lerp_bias{ " + 128", - "", " + 127", }; - static constexpr std::array tev_bias_table{ + static constexpr Common::EnumMap tev_bias_table{ "", // Zero, " + 128", // AddHalf, " - 128", // SubHalf, "", }; - static constexpr std::array tev_op_table{ + static constexpr Common::EnumMap tev_op_table{ '+', // TevOp::Add = 0, '-', // TevOp::Sub = 1, }; @@ -1724,17 +1723,16 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy // - a rounding bias is added before dividing by 256 - out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[u32(bias)], - tev_scale_table_left[u32(scale)]); - out.Write(" {} ", tev_op_table[u32(op)]); - out.Write("(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)", - components, components, components, components, components, - tev_scale_table_left[u32(scale)], - tev_lerp_bias[2 * u32(op) + ((scale == TevScale::Divide2) == alpha)]); - out.Write("){}", tev_scale_table_right[u32(scale)]); + out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[scale]); + out.Write(" {} ", tev_op_table[op]); + out.Write("(((((tevin_a.{0}<<8) + " + "(tevin_b.{0}-tevin_a.{0})*(tevin_c.{0}+(tevin_c.{0}>>7))){1}){2})>>8)", + components, tev_scale_table_left[scale], + ((scale == TevScale::Divide2) == alpha) ? tev_lerp_bias[op] : ""); + out.Write("){}", tev_scale_table_right[scale]); } -constexpr std::array tev_alpha_funcs_table{ +constexpr Common::EnumMap tev_alpha_funcs_table{ "(false)", // CompareMode::Never "(prev.a < {})", // CompareMode::Less "(prev.a == {})", // CompareMode::Equal @@ -1745,7 +1743,7 @@ constexpr std::array tev_alpha_funcs_table{ "(true)" // CompareMode::Always }; -constexpr std::array tev_alpha_funclogic_table{ +constexpr Common::EnumMap tev_alpha_funclogic_table{ " && ", // and " || ", // or " != ", // xor @@ -1763,9 +1761,9 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat const auto write_alpha_func = [&out](CompareMode mode, std::string_view ref) { const bool has_no_arguments = mode == CompareMode::Never || mode == CompareMode::Always; if (has_no_arguments) - out.Write("{}", tev_alpha_funcs_table[u32(mode)]); + out.Write("{}", tev_alpha_funcs_table[mode]); else - out.Write(tev_alpha_funcs_table[u32(mode)], ref); + out.Write(tev_alpha_funcs_table[mode], ref); }; out.SetConstantsUsed(C_ALPHA, C_ALPHA); @@ -1779,7 +1777,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat write_alpha_func(uid_data->alpha_test_comp0, alpha_ref[0]); // Lookup the logic op - out.Write("{}", tev_alpha_funclogic_table[u32(uid_data->alpha_test_logic)]); + out.Write("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]); // Lookup the second component from the alpha function table write_alpha_func(uid_data->alpha_test_comp1, alpha_ref[1]); @@ -1809,7 +1807,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat out.Write("\t}}\n"); } -constexpr std::array tev_fog_funcs_table{ +constexpr Common::EnumMap tev_fog_funcs_table{ "", // No Fog "", // ? "", // Linear @@ -1866,7 +1864,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) if (uid_data->fog_fsel >= FogType::Exp) { - out.Write("{}", tev_fog_funcs_table[u32(uid_data->fog_fsel)]); + out.Write("{}", tev_fog_funcs_table[uid_data->fog_fsel]); } else { @@ -1919,7 +1917,8 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) { if (uid_data->blend_enable) { - static constexpr std::array blend_src_factor{ + using Common::EnumMap; + static constexpr EnumMap blend_src_factor{ "float3(0,0,0);", // ZERO "float3(1,1,1);", // ONE "initial_ocol0.rgb;", // DSTCLR @@ -1929,7 +1928,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.aaa;", // DSTALPHA "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA }; - static constexpr std::array blend_src_factor_alpha{ + static constexpr EnumMap blend_src_factor_alpha{ "0.0;", // ZERO "1.0;", // ONE "initial_ocol0.a;", // DSTCLR @@ -1939,7 +1938,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.a;", // DSTALPHA "1.0 - initial_ocol0.a;", // INVDSTALPHA }; - static constexpr std::array blend_dst_factor{ + static constexpr EnumMap blend_dst_factor{ "float3(0,0,0);", // ZERO "float3(1,1,1);", // ONE "ocol0.rgb;", // SRCCLR @@ -1949,7 +1948,7 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.aaa;", // DSTALPHA "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA }; - static constexpr std::array blend_dst_factor_alpha{ + static constexpr EnumMap blend_dst_factor_alpha{ "0.0;", // ZERO "1.0;", // ONE "ocol0.a;", // SRCCLR @@ -1960,13 +1959,11 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "1.0 - initial_ocol0.a;", // INVDSTALPHA }; out.Write("\tfloat4 blend_src;\n"); - out.Write("\tblend_src.rgb = {}\n", blend_src_factor[u32(uid_data->blend_src_factor)]); - out.Write("\tblend_src.a = {}\n", - blend_src_factor_alpha[u32(uid_data->blend_src_factor_alpha)]); + out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]); + out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]); out.Write("\tfloat4 blend_dst;\n"); - out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[u32(uid_data->blend_dst_factor)]); - out.Write("\tblend_dst.a = {}\n", - blend_dst_factor_alpha[u32(uid_data->blend_dst_factor_alpha)]); + out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]); + out.Write("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]); out.Write("\tfloat4 blend_result;\n"); if (uid_data->blend_subtract) From f53dc6564fe16c23234b28907003d0d4cb1c4c5c Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 27 Apr 2021 22:01:38 -0700 Subject: [PATCH 04/23] UberShaderPixel: Convert to EnumMap --- Source/Core/VideoCommon/UberShaderPixel.cpp | 461 ++++++-------------- 1 file changed, 137 insertions(+), 324 deletions(-) diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 36fc6addc1..c7719ec377 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -404,263 +404,95 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, "int4 getKonstColor(State s, StageState ss);\n" "\n"); - // The switch statements in these functions appear to get transformed into an if..else chain - // on NVIDIA's OpenGL/Vulkan drivers, resulting in lower performance than the D3D counterparts. - // Transforming the switch into a binary tree of ifs can increase performance by up to 20%. - if (api_type == APIType::D3D) - { - out.Write("// Helper function for Alpha Test\n" - "bool alphaCompare(int a, int b, uint compare) {{\n" - " switch (compare) {{\n" - " case 0u: // NEVER\n" - " return false;\n" - " case 1u: // LESS\n" - " return a < b;\n" - " case 2u: // EQUAL\n" - " return a == b;\n" - " case 3u: // LEQUAL\n" - " return a <= b;\n" - " case 4u: // GREATER\n" - " return a > b;\n" - " case 5u: // NEQUAL;\n" - " return a != b;\n" - " case 6u: // GEQUAL\n" - " return a >= b;\n" - " case 7u: // ALWAYS\n" - " return true;\n" - " }}\n" - "}}\n" - "\n" - "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev.rgb\n" - " return s.Reg[0].rgb;\n" - " case 1u: // prev.aaa\n" - " return s.Reg[0].aaa;\n" - " case 2u: // c0.rgb\n" - " return s.Reg[1].rgb;\n" - " case 3u: // c0.aaa\n" - " return s.Reg[1].aaa;\n" - " case 4u: // c1.rgb\n" - " return s.Reg[2].rgb;\n" - " case 5u: // c1.aaa\n" - " return s.Reg[2].aaa;\n" - " case 6u: // c2.rgb\n" - " return s.Reg[3].rgb;\n" - " case 7u: // c2.aaa\n" - " return s.Reg[3].aaa;\n" - " case 8u:\n" - " return s.TexColor.rgb;\n" - " case 9u:\n" - " return s.TexColor.aaa;\n" - " case 10u:\n" - " return getRasColor(s, ss, colors_0, colors_1).rgb;\n" - " case 11u:\n" - " return getRasColor(s, ss, colors_0, colors_1).aaa;\n" - " case 12u: // One\n" - " return int3(255, 255, 255);\n" - " case 13u: // Half\n" - " return int3(128, 128, 128);\n" - " case 14u:\n" - " return getKonstColor(s, ss).rgb;\n" - " case 15u: // Zero\n" - " return int3(0, 0, 0);\n" - " }}\n" - "}}\n" - "\n" - "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev.a\n" - " return s.Reg[0].a;\n" - " case 1u: // c0.a\n" - " return s.Reg[1].a;\n" - " case 2u: // c1.a\n" - " return s.Reg[2].a;\n" - " case 3u: // c2.a\n" - " return s.Reg[3].a;\n" - " case 4u:\n" - " return s.TexColor.a;\n" - " case 5u:\n" - " return getRasColor(s, ss, colors_0, colors_1).a;\n" - " case 6u:\n" - " return getKonstColor(s, ss).a;\n" - " case 7u: // Zero\n" - " return 0;\n" - " }}\n" - "}}\n" - "\n" - "int4 getTevReg(in State s, uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " return s.Reg[0];\n" - " case 1u: // c0\n" - " return s.Reg[1];\n" - " case 2u: // c1\n" - " return s.Reg[2];\n" - " case 3u: // c2\n" - " return s.Reg[3];\n" - " default: // prev\n" - " return s.Reg[0];\n" - " }}\n" - "}}\n" - "\n" - "void setRegColor(inout State s, uint index, int3 color) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " s.Reg[0].rgb = color;\n" - " break;\n" - " case 1u: // c0\n" - " s.Reg[1].rgb = color;\n" - " break;\n" - " case 2u: // c1\n" - " s.Reg[2].rgb = color;\n" - " break;\n" - " case 3u: // c2\n" - " s.Reg[3].rgb = color;\n" - " break;\n" - " }}\n" - "}}\n" - "\n" - "void setRegAlpha(inout State s, uint index, int alpha) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " s.Reg[0].a = alpha;\n" - " break;\n" - " case 1u: // c0\n" - " s.Reg[1].a = alpha;\n" - " break;\n" - " case 2u: // c1\n" - " s.Reg[2].a = alpha;\n" - " break;\n" - " case 3u: // c2\n" - " s.Reg[3].a = alpha;\n" - " break;\n" - " }}\n" - "}}\n" - "\n"); - } - else - { - out.Write( - "// Helper function for Alpha Test\n" - "bool alphaCompare(int a, int b, uint compare) {{\n" - " if (compare < 4u) {{\n" - " if (compare < 2u) {{\n" - " return (compare == 0u) ? (false) : (a < b);\n" - " }} else {{\n" - " return (compare == 2u) ? (a == b) : (a <= b);\n" - " }}\n" - " }} else {{\n" - " if (compare < 6u) {{\n" - " return (compare == 4u) ? (a > b) : (a != b);\n" - " }} else {{\n" - " return (compare == 6u) ? (a >= b) : (true);\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " if (index < 8u) {{\n" - " if (index < 4u) {{\n" - " if (index < 2u) {{\n" - " return (index == 0u) ? s.Reg[0].rgb : s.Reg[0].aaa;\n" - " }} else {{\n" - " return (index == 2u) ? s.Reg[1].rgb : s.Reg[1].aaa;\n" - " }}\n" - " }} else {{\n" - " if (index < 6u) {{\n" - " return (index == 4u) ? s.Reg[2].rgb : s.Reg[2].aaa;\n" - " }} else {{\n" - " return (index == 6u) ? s.Reg[3].rgb : s.Reg[3].aaa;\n" - " }}\n" - " }}\n" - " }} else {{\n" - " if (index < 12u) {{\n" - " if (index < 10u) {{\n" - " return (index == 8u) ? s.TexColor.rgb : s.TexColor.aaa;\n" - " }} else {{\n" - " int4 ras = getRasColor(s, ss, colors_0, colors_1);\n" - " return (index == 10u) ? ras.rgb : ras.aaa;\n" - " }}\n" - " }} else {{\n" - " if (index < 14u) {{\n" - " return (index == 12u) ? int3(255, 255, 255) : int3(128, 128, 128);\n" - " }} else {{\n" - " return (index == 14u) ? getKonstColor(s, ss).rgb : int3(0, 0, 0);\n" - " }}\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " if (index < 4u) {{\n" - " if (index < 2u) {{\n" - " return (index == 0u) ? s.Reg[0].a : s.Reg[1].a;\n" - " }} else {{\n" - " return (index == 2u) ? s.Reg[2].a : s.Reg[3].a;\n" - " }}\n" - " }} else {{\n" - " if (index < 6u) {{\n" - " return (index == 4u) ? s.TexColor.a : getRasColor(s, ss, colors_0, colors_1).a;\n" - " }} else {{\n" - " return (index == 6u) ? getKonstColor(s, ss).a : 0;\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "int4 getTevReg(in State s, uint index) {{\n" - " if (index < 2u) {{\n" - " if (index == 0u) {{\n" - " return s.Reg[0];\n" - " }} else {{\n" - " return s.Reg[1];\n" - " }}\n" - " }} else {{\n" - " if (index == 2u) {{\n" - " return s.Reg[2];\n" - " }} else {{\n" - " return s.Reg[3];\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "void setRegColor(inout State s, uint index, int3 color) {{\n" - " if (index < 2u) {{\n" - " if (index == 0u) {{\n" - " s.Reg[0].rgb = color;\n" - " }} else {{\n" - " s.Reg[1].rgb = color;\n" - " }}\n" - " }} else {{\n" - " if (index == 2u) {{\n" - " s.Reg[2].rgb = color;\n" - " }} else {{\n" - " s.Reg[3].rgb = color;\n" - " }}\n" - " }}\n" - "}}\n" - "\n" - "void setRegAlpha(inout State s, uint index, int alpha) {{\n" - " if (index < 2u) {{\n" - " if (index == 0u) {{\n" - " s.Reg[0].a = alpha;\n" - " }} else {{\n" - " s.Reg[1].a = alpha;\n" - " }}\n" - " }} else {{\n" - " if (index == 2u) {{\n" - " s.Reg[2].a = alpha;\n" - " }} else {{\n" - " s.Reg[3].a = alpha;\n" - " }}\n" - " }}\n" - "}}\n" - "\n"); - } + static constexpr Common::EnumMap tev_alpha_funcs_table{ + "return false;", // CompareMode::Never + "return a < b;", // CompareMode::Less + "return a == b;", // CompareMode::Equal + "return a <= b;", // CompareMode::LEqual + "return a > b;", // CompareMode::Greater + "return a != b;", // CompareMode::NEqual + "return a >= b;", // CompareMode::GEqual + "return true;" // CompareMode::Always + }; + + static constexpr Common::EnumMap tev_c_input_table{ + "return s.Reg[0].rgb;", // CPREV, + "return s.Reg[0].aaa;", // APREV, + "return s.Reg[1].rgb;", // C0, + "return s.Reg[1].aaa;", // A0, + "return s.Reg[2].rgb;", // C1, + "return s.Reg[2].aaa;", // A1, + "return s.Reg[3].rgb;", // C2, + "return s.Reg[3].aaa;", // A2, + "return s.TexColor.rgb;", // TEXC, + "return s.TexColor.aaa;", // TEXA, + "return getRasColor(s, ss, colors_0, colors_1).rgb;", // RASC, + "return getRasColor(s, ss, colors_0, colors_1).aaa;", // RASA, + "return int3(255, 255, 255);", // ONE + "return int3(128, 128, 128);", // HALF + "return getKonstColor(s, ss).rgb;", // KONST + "return int3(0, 0, 0);", // ZERO + }; + + static constexpr Common::EnumMap tev_a_input_table{ + "return s.Reg[0].a;", // APREV, + "return s.Reg[1].a;", // A0, + "return s.Reg[2].a;", // A1, + "return s.Reg[3].a;", // A2, + "return s.TexColor.a;", // TEXA, + "return getRasColor(s, ss, colors_0, colors_1).a;", // RASA, + "return getKonstColor(s, ss).a;", // KONST, (hw1 had quarter) + "return 0;", // ZERO + }; + + static constexpr Common::EnumMap tev_regs_lookup_table{ + "return s.Reg[0];", + "return s.Reg[1];", + "return s.Reg[2];", + "return s.Reg[3];", + }; + + static constexpr Common::EnumMap tev_c_set_table{ + "s.Reg[0].rgb = color;", + "s.Reg[1].rgb = color;", + "s.Reg[2].rgb = color;", + "s.Reg[3].rgb = color;", + }; + + static constexpr Common::EnumMap tev_a_set_table{ + "s.Reg[0].a = alpha;", + "s.Reg[1].a = alpha;", + "s.Reg[2].a = alpha;", + "s.Reg[3].a = alpha;", + }; + + out.Write("// Helper function for Alpha Test\n" + "bool alphaCompare(int a, int b, uint compare) {{\n"); + WriteSwitch(out, api_type, "compare", tev_alpha_funcs_table, 2, false); + out.Write("}}\n" + "\n" + "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " + "uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_c_input_table, 2, false); + out.Write("}}\n" + "\n" + "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " + "uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_a_input_table, 2, false); + out.Write("}}\n" + "\n" + "int4 getTevReg(in State s, uint index) {{\n"); + WriteSwitch(out, api_type, "index", tev_regs_lookup_table, 2, false); + out.Write("}}\n" + "\n" + "void setRegColor(inout State s, uint index, int3 color) {{\n"); + WriteSwitch(out, api_type, "index", tev_c_set_table, 2, true); + out.Write("}}\n" + "\n" + "void setRegAlpha(inout State s, uint index, int alpha) {{\n"); + WriteSwitch(out, api_type, "index", tev_a_set_table, 2, true); + out.Write("}}\n" + "\n"); // Since the fixed-point texture coodinate variables aren't global, we need to pass // them to the select function. This applies to all backends. @@ -1284,78 +1116,59 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, if (use_shader_blend) { - static constexpr std::array blendSrcFactor{{ - "float3(0,0,0);", // ZERO - "float3(1,1,1);", // ONE - "initial_ocol0.rgb;", // DSTCLR - "float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR - "ocol1.aaa;", // SRCALPHA - "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA - "initial_ocol0.aaa;", // DSTALPHA - "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA - }}; - static constexpr std::array blendSrcFactorAlpha{{ - "0.0;", // ZERO - "1.0;", // ONE - "initial_ocol0.a;", // DSTCLR - "1.0 - initial_ocol0.a;", // INVDSTCLR - "ocol1.a;", // SRCALPHA - "1.0 - ocol1.a;", // INVSRCALPHA - "initial_ocol0.a;", // DSTALPHA - "1.0 - initial_ocol0.a;", // INVDSTALPHA - }}; - static constexpr std::array blendDstFactor{{ - "float3(0,0,0);", // ZERO - "float3(1,1,1);", // ONE - "ocol0.rgb;", // SRCCLR - "float3(1,1,1) - ocol0.rgb;", // INVSRCCLR - "ocol1.aaa;", // SRCALHA - "float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA - "initial_ocol0.aaa;", // DSTALPHA - "float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA - }}; - static constexpr std::array blendDstFactorAlpha{{ - "0.0;", // ZERO - "1.0;", // ONE - "ocol0.a;", // SRCCLR - "1.0 - ocol0.a;", // INVSRCCLR - "ocol1.a;", // SRCALPHA - "1.0 - ocol1.a;", // INVSRCALPHA - "initial_ocol0.a;", // DSTALPHA - "1.0 - initial_ocol0.a;", // INVDSTALPHA - }}; + using Common::EnumMap; + + static constexpr EnumMap blendSrcFactor{ + "blend_src.rgb = float3(0,0,0);", // ZERO + "blend_src.rgb = float3(1,1,1);", // ONE + "blend_src.rgb = initial_ocol0.rgb;", // DSTCLR + "blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR + "blend_src.rgb = ocol1.aaa;", // SRCALPHA + "blend_src.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "blend_src.rgb = initial_ocol0.aaa;", // DSTALPHA + "blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static constexpr EnumMap blendSrcFactorAlpha{ + "blend_src.a = 0.0;", // ZERO + "blend_src.a = 1.0;", // ONE + "blend_src.a = initial_ocol0.a;", // DSTCLR + "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTCLR + "blend_src.a = ocol1.a;", // SRCALPHA + "blend_src.a = 1.0 - ocol1.a;", // INVSRCALPHA + "blend_src.a = initial_ocol0.a;", // DSTALPHA + "blend_src.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA + }; + static constexpr EnumMap blendDstFactor{ + "blend_dst.rgb = float3(0,0,0);", // ZERO + "blend_dst.rgb = float3(1,1,1);", // ONE + "blend_dst.rgb = ocol0.rgb;", // SRCCLR + "blend_dst.rgb = float3(1,1,1) - ocol0.rgb;", // INVSRCCLR + "blend_dst.rgb = ocol1.aaa;", // SRCALHA + "blend_dst.rgb = float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA + "blend_dst.rgb = initial_ocol0.aaa;", // DSTALPHA + "blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA + }; + static constexpr EnumMap blendDstFactorAlpha{ + "blend_dst.a = 0.0;", // ZERO + "blend_dst.a = 1.0;", // ONE + "blend_dst.a = ocol0.a;", // SRCCLR + "blend_dst.a = 1.0 - ocol0.a;", // INVSRCCLR + "blend_dst.a = ocol1.a;", // SRCALPHA + "blend_dst.a = 1.0 - ocol1.a;", // INVSRCALPHA + "blend_dst.a = initial_ocol0.a;", // DSTALPHA + "blend_dst.a = 1.0 - initial_ocol0.a;", // INVDSTALPHA + }; out.Write(" if (blend_enable) {{\n" - " float4 blend_src;\n" - " switch (blend_src_factor) {{\n"); - for (size_t i = 0; i < blendSrcFactor.size(); i++) - { - out.Write(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]); - } + " float4 blend_src;\n"); + WriteSwitch(out, api_type, "blend_src_factor", blendSrcFactor, 4, true); + WriteSwitch(out, api_type, "blend_src_factor_alpha", blendSrcFactorAlpha, 4, true); - out.Write(" }}\n" - " switch (blend_src_factor_alpha) {{\n"); - for (size_t i = 0; i < blendSrcFactorAlpha.size(); i++) - { - out.Write(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]); - } - - out.Write(" }}\n" - " float4 blend_dst;\n" - " switch (blend_dst_factor) {{\n"); - for (size_t i = 0; i < blendDstFactor.size(); i++) - { - out.Write(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]); - } - out.Write(" }}\n" - " switch (blend_dst_factor_alpha) {{\n"); - for (size_t i = 0; i < blendDstFactorAlpha.size(); i++) - { - out.Write(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]); - } + out.Write(" float4 blend_dst;\n"); + WriteSwitch(out, api_type, "blend_dst_factor", blendDstFactor, 4, true); + WriteSwitch(out, api_type, "blend_dst_factor_alpha", blendDstFactorAlpha, 4, true); out.Write( - " }}\n" " float4 blend_result;\n" " if (blend_subtract)\n" " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n" From 205ab23d80e5c1975a1e022fa171ea55563ea1c7 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Fri, 30 Apr 2021 19:55:17 -0700 Subject: [PATCH 05/23] GeometryShaderGen: Convert to EnumMap --- Source/Core/VideoCommon/GeometryShaderGen.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index c8b4bdc261..4108a6efd6 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -6,25 +6,29 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -constexpr std::array primitives_ogl{ +constexpr Common::EnumMap primitives_ogl{ "points", "lines", "triangles", "triangles", }; -constexpr std::array primitives_d3d{ +constexpr Common::EnumMap primitives_d3d{ "point", "line", "triangle", "triangle", }; +constexpr Common::EnumMap vertex_in_map{1u, 2u, 3u, 3u}; +constexpr Common::EnumMap vertex_out_map{4u, 4u, 4u, 3u}; + bool geometry_shader_uid_data::IsPassthrough() const { const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off; @@ -61,9 +65,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& const bool ssaa = host_config.ssaa; const bool stereo = host_config.stereo; const auto primitive_type = static_cast(uid_data->primitive_type); - const auto primitive_type_index = static_cast(uid_data->primitive_type); - const auto vertex_in = std::min(static_cast(primitive_type_index) + 1, 3u); - u32 vertex_out = primitive_type == PrimitiveType::TriangleStrip ? 3 : 4; + const u32 vertex_in = vertex_in_map[primitive_type]; + u32 vertex_out = vertex_out_map[primitive_type]; if (wireframe) vertex_out++; @@ -73,14 +76,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& // Insert layout parameters if (host_config.backend_gs_instancing) { - out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index], + out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type], stereo ? 2 : 1); out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", vertex_out); } else { - out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]); + out.Write("layout({}) in;\n", primitives_ogl[primitive_type]); out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", stereo ? vertex_out * 2 : vertex_out); } @@ -139,13 +142,13 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1); out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream output, in uint " "InstanceID : SV_GSInstanceID)\n{{\n", - primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); + primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle"); } else { out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out); out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream output)\n{{\n", - primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); + primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle"); } out.Write("\tVertexData ps;\n"); From 3fc12431c5ed7a0df1e9bd4c538405a44f6561de Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Fri, 30 Apr 2021 14:48:27 -0700 Subject: [PATCH 06/23] Remove parameters to SWVertexLoader::SetFormat They haven't been used since efbe5bc4b65. --- Source/Core/VideoBackends/Software/SWVertexLoader.cpp | 4 ++-- Source/Core/VideoBackends/Software/SWVertexLoader.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 5c0362b2c5..738162de80 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -74,7 +74,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ memset(static_cast(&m_vertex), 0, sizeof(m_vertex)); // parse the videocommon format to our own struct format (m_vertex) - SetFormat(g_main_cp_state.last_id, primitiveType); + SetFormat(); ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index); // transform this vertex so that it can be used for rasterization (outVertex) @@ -98,7 +98,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ DebugUtil::OnObjectEnd(); } -void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) +void SWVertexLoader::SetFormat() { // matrix index from xf regs or cp memory? if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx || diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index 4bc9f67ca8..bbda8da037 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -22,7 +22,7 @@ public: protected: void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; - void SetFormat(u8 attributeIndex, u8 primitiveType); + void SetFormat(); void ParseVertex(const PortableVertexDeclaration& vdec, int index); InputVertexData m_vertex{}; From 3aaeb2b9ef3f6b9cde69c77ecdf96e8defc996d5 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Fri, 30 Apr 2021 14:57:12 -0700 Subject: [PATCH 07/23] Convert OpcodeDecoder::Opcode and OpcodeDecoder::Primitive to enum class --- Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp | 27 +++++---- Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp | 60 ++++++++++--------- .../VideoBackends/Software/SWVertexLoader.cpp | 13 ++-- .../Core/VideoBackends/Software/SetupUnit.cpp | 21 +++---- .../Core/VideoBackends/Software/SetupUnit.h | 9 ++- Source/Core/VideoCommon/IndexGenerator.cpp | 30 +++++----- Source/Core/VideoCommon/IndexGenerator.h | 7 ++- Source/Core/VideoCommon/OpcodeDecoding.cpp | 33 +++++----- Source/Core/VideoCommon/OpcodeDecoding.h | 32 +++++++--- .../Core/VideoCommon/VertexLoaderManager.cpp | 6 +- Source/Core/VideoCommon/VertexLoaderManager.h | 8 ++- Source/Core/VideoCommon/VertexManagerBase.cpp | 51 ++++++++-------- Source/Core/VideoCommon/VertexManagerBase.h | 12 +++- 13 files changed, 177 insertions(+), 132 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp index 3ac65e9999..a2103bd8a8 100644 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp @@ -150,18 +150,19 @@ FifoAnalyzer::CPMemory s_CpMem; u32 AnalyzeCommand(const u8* data, DecodeMode mode) { + using OpcodeDecoder::Opcode; const u8* dataStart = data; int cmd = ReadFifo8(data); - switch (cmd) + switch (static_cast(cmd)) { - case OpcodeDecoder::GX_NOP: - case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS: - case OpcodeDecoder::GX_CMD_INVL_VC: + case Opcode::GX_NOP: + case Opcode::GX_CMD_UNKNOWN_METRICS: + case Opcode::GX_CMD_INVL_VC: break; - case OpcodeDecoder::GX_LOAD_CP_REG: + case Opcode::GX_LOAD_CP_REG: { s_DrawingObject = false; @@ -171,7 +172,7 @@ u32 AnalyzeCommand(const u8* data, DecodeMode mode) break; } - case OpcodeDecoder::GX_LOAD_XF_REG: + case Opcode::GX_LOAD_XF_REG: { s_DrawingObject = false; @@ -182,14 +183,14 @@ u32 AnalyzeCommand(const u8* data, DecodeMode mode) break; } - case OpcodeDecoder::GX_LOAD_INDX_A: - case OpcodeDecoder::GX_LOAD_INDX_B: - case OpcodeDecoder::GX_LOAD_INDX_C: - case OpcodeDecoder::GX_LOAD_INDX_D: + case Opcode::GX_LOAD_INDX_A: + case Opcode::GX_LOAD_INDX_B: + case Opcode::GX_LOAD_INDX_C: + case Opcode::GX_LOAD_INDX_D: { s_DrawingObject = false; - int array = 0xc + (cmd - OpcodeDecoder::GX_LOAD_INDX_A) / 8; + int array = 0xc + (cmd - static_cast(Opcode::GX_LOAD_INDX_A)) / 8; u32 value = ReadFifo32(data); if (mode == DecodeMode::Record) @@ -197,7 +198,7 @@ u32 AnalyzeCommand(const u8* data, DecodeMode mode) break; } - case OpcodeDecoder::GX_CMD_CALL_DL: + case Opcode::GX_CMD_CALL_DL: // The recorder should have expanded display lists into the fifo stream and skipped the call to // start them // That is done to make it easier to track where memory is updated @@ -205,7 +206,7 @@ u32 AnalyzeCommand(const u8* data, DecodeMode mode) data += 8; break; - case OpcodeDecoder::GX_LOAD_BP_REG: + case Opcode::GX_LOAD_BP_REG: { s_DrawingObject = false; ReadFifo32(data); diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 4fcc0b2fb8..21a4572d38 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -173,17 +173,13 @@ static std::string GetPrimitiveName(u8 cmd) const u8 vat = cmd & OpcodeDecoder::GX_VAT_MASK; // Vertex loader index (0 - 7) const u8 primitive = (cmd & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT; - static constexpr std::array names = { - "GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)", - "GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP", - "GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES", - "GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS", - }; - return fmt::format("{} VAT {}", names[primitive], vat); + return fmt::format("{} VAT {}", static_cast(primitive), vat); } void FIFOAnalyzer::UpdateDetails() { + using OpcodeDecoder::Opcode; + // Clearing the detail list can update the selection, which causes UpdateDescription to be called // immediately. However, the object data offsets have not been recalculated yet, which can cause // the wrong data to be used, potentially leading to out of bounds data or other bad things. @@ -223,14 +219,14 @@ void FIFOAnalyzer::UpdateDetails() const u32 start_offset = object_offset; m_object_data_offsets.push_back(start_offset); - const u8 command = object[object_offset++]; - switch (command) + const Opcode opcode = static_cast(object[object_offset++]); + switch (opcode) { - case OpcodeDecoder::GX_NOP: - if (object[object_offset] == OpcodeDecoder::GX_NOP) + case Opcode::GX_NOP: + if (object[object_offset] == static_cast(Opcode::GX_NOP)) { u32 nop_count = 2; - while (object[++object_offset] == OpcodeDecoder::GX_NOP) + while (object[++object_offset] == static_cast(Opcode::GX_NOP)) nop_count++; new_label = QStringLiteral("NOP (%1x)").arg(nop_count); @@ -241,15 +237,15 @@ void FIFOAnalyzer::UpdateDetails() } break; - case OpcodeDecoder::GX_CMD_UNKNOWN_METRICS: + case Opcode::GX_CMD_UNKNOWN_METRICS: new_label = QStringLiteral("GX_CMD_UNKNOWN_METRICS"); break; - case OpcodeDecoder::GX_CMD_INVL_VC: + case Opcode::GX_CMD_INVL_VC: new_label = QStringLiteral("GX_CMD_INVL_VC"); break; - case OpcodeDecoder::GX_LOAD_CP_REG: + case Opcode::GX_LOAD_CP_REG: { const u8 cmd2 = object[object_offset++]; const u32 value = Common::swap32(&object[object_offset]); @@ -265,7 +261,7 @@ void FIFOAnalyzer::UpdateDetails() } break; - case OpcodeDecoder::GX_LOAD_XF_REG: + case Opcode::GX_LOAD_XF_REG: { const auto [name, desc] = GetXFTransferInfo(&object[object_offset]); const u32 cmd2 = Common::swap32(&object[object_offset]); @@ -288,7 +284,7 @@ void FIFOAnalyzer::UpdateDetails() } break; - case OpcodeDecoder::GX_LOAD_INDX_A: + case Opcode::GX_LOAD_INDX_A: { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(&object[object_offset])); @@ -296,7 +292,7 @@ void FIFOAnalyzer::UpdateDetails() new_label = QStringLiteral("LOAD INDX A %1").arg(QString::fromStdString(desc)); } break; - case OpcodeDecoder::GX_LOAD_INDX_B: + case Opcode::GX_LOAD_INDX_B: { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(&object[object_offset])); @@ -304,7 +300,7 @@ void FIFOAnalyzer::UpdateDetails() new_label = QStringLiteral("LOAD INDX B %1").arg(QString::fromStdString(desc)); } break; - case OpcodeDecoder::GX_LOAD_INDX_C: + case Opcode::GX_LOAD_INDX_C: { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(&object[object_offset])); @@ -312,7 +308,7 @@ void FIFOAnalyzer::UpdateDetails() new_label = QStringLiteral("LOAD INDX C %1").arg(QString::fromStdString(desc)); } break; - case OpcodeDecoder::GX_LOAD_INDX_D: + case Opcode::GX_LOAD_INDX_D: { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(&object[object_offset])); @@ -321,7 +317,7 @@ void FIFOAnalyzer::UpdateDetails() } break; - case OpcodeDecoder::GX_CMD_CALL_DL: + case Opcode::GX_CMD_CALL_DL: // The recorder should have expanded display lists into the fifo stream and skipped the // call to start them // That is done to make it easier to track where memory is updated @@ -330,7 +326,7 @@ void FIFOAnalyzer::UpdateDetails() new_label = QStringLiteral("CALL DL"); break; - case OpcodeDecoder::GX_LOAD_BP_REG: + case Opcode::GX_LOAD_BP_REG: { const u8 cmd2 = object[object_offset++]; const u32 cmddata = Common::swap24(&object[object_offset]); @@ -347,6 +343,8 @@ void FIFOAnalyzer::UpdateDetails() break; default: + { + const u8 command = static_cast(opcode); if ((command & 0xC0) == 0x80) { // Object primitive data @@ -392,6 +390,7 @@ void FIFOAnalyzer::UpdateDetails() } break; } + } new_label = QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) + new_label; m_detail_list->addItem(new_label); @@ -539,6 +538,8 @@ void FIFOAnalyzer::ShowSearchResult(size_t index) void FIFOAnalyzer::UpdateDescription() { + using OpcodeDecoder::Opcode; + m_entry_detail_browser->clear(); if (!FifoPlayer::GetInstance().IsPlaying()) @@ -563,11 +564,12 @@ void FIFOAnalyzer::UpdateDescription() const u32 entry_start = m_object_data_offsets[entry_nr]; const u8* cmddata = &fifo_frame.fifoData[object_start + entry_start]; + const Opcode opcode = static_cast(*cmddata); // TODO: Not sure whether we should bother translating the descriptions QString text; - if (*cmddata == OpcodeDecoder::GX_LOAD_BP_REG) + if (opcode == Opcode::GX_LOAD_BP_REG) { const u8 cmd = *(cmddata + 1); const u32 value = Common::swap24(cmddata + 2); @@ -584,7 +586,7 @@ void FIFOAnalyzer::UpdateDescription() else text += QString::fromStdString(desc); } - else if (*cmddata == OpcodeDecoder::GX_LOAD_CP_REG) + else if (opcode == Opcode::GX_LOAD_CP_REG) { const u8 cmd = *(cmddata + 1); const u32 value = Common::swap32(cmddata + 2); @@ -601,7 +603,7 @@ void FIFOAnalyzer::UpdateDescription() else text += QString::fromStdString(desc); } - else if (*cmddata == OpcodeDecoder::GX_LOAD_XF_REG) + else if (opcode == Opcode::GX_LOAD_XF_REG) { const auto [name, desc] = GetXFTransferInfo(cmddata + 1); ASSERT(!name.empty()); @@ -615,7 +617,7 @@ void FIFOAnalyzer::UpdateDescription() else text += QString::fromStdString(desc); } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_A) + else if (opcode == Opcode::GX_LOAD_INDX_A) { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(cmddata + 1)); @@ -625,7 +627,7 @@ void FIFOAnalyzer::UpdateDescription() text += QLatin1Char{'\n'}; text += QString::fromStdString(written); } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_B) + else if (opcode == Opcode::GX_LOAD_INDX_B) { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(cmddata + 1)); @@ -637,7 +639,7 @@ void FIFOAnalyzer::UpdateDescription() text += QLatin1Char{'\n'}; text += QString::fromStdString(written); } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_C) + else if (opcode == Opcode::GX_LOAD_INDX_C) { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(cmddata + 1)); @@ -648,7 +650,7 @@ void FIFOAnalyzer::UpdateDescription() text += QLatin1Char{'\n'}; text += QString::fromStdString(written); } - else if (*cmddata == OpcodeDecoder::GX_LOAD_INDX_D) + else if (opcode == Opcode::GX_LOAD_INDX_D) { const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(cmddata + 1)); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 738162de80..a7d0506a1f 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -36,20 +36,21 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ { DebugUtil::OnObjectBegin(); - u8 primitiveType = 0; + using OpcodeDecoder::Primitive; + Primitive primitive_type = Primitive::GX_DRAW_QUADS; switch (m_current_primitive_type) { case PrimitiveType::Points: - primitiveType = OpcodeDecoder::GX_DRAW_POINTS; + primitive_type = Primitive::GX_DRAW_POINTS; break; case PrimitiveType::Lines: - primitiveType = OpcodeDecoder::GX_DRAW_LINES; + primitive_type = Primitive::GX_DRAW_LINES; break; case PrimitiveType::Triangles: - primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLES; + primitive_type = Primitive::GX_DRAW_TRIANGLES; break; case PrimitiveType::TriangleStrip: - primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP; + primitive_type = Primitive::GX_DRAW_TRIANGLE_STRIP; break; } @@ -57,7 +58,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ if (g_renderer->IsBBoxEnabled()) g_renderer->BBoxFlush(); - m_setup_unit.Init(primitiveType); + m_setup_unit.Init(primitive_type); // set all states with are stored within video sw for (int i = 0; i < 4; i++) diff --git a/Source/Core/VideoBackends/Software/SetupUnit.cpp b/Source/Core/VideoBackends/Software/SetupUnit.cpp index b2488a63d8..48ab2b2d9b 100644 --- a/Source/Core/VideoBackends/Software/SetupUnit.cpp +++ b/Source/Core/VideoBackends/Software/SetupUnit.cpp @@ -9,9 +9,9 @@ #include "VideoBackends/Software/Clipper.h" #include "VideoCommon/OpcodeDecoding.h" -void SetupUnit::Init(u8 primitiveType) +void SetupUnit::Init(OpcodeDecoder::Primitive primitive_type) { - m_PrimType = primitiveType; + m_PrimType = primitive_type; m_VertexCounter = 0; m_VertPointer[0] = &m_Vertices[0]; @@ -28,31 +28,32 @@ OutputVertexData* SetupUnit::GetVertex() void SetupUnit::SetupVertex() { + using OpcodeDecoder::Primitive; switch (m_PrimType) { - case OpcodeDecoder::GX_DRAW_QUADS: + case Primitive::GX_DRAW_QUADS: SetupQuad(); break; - case OpcodeDecoder::GX_DRAW_QUADS_2: + case Primitive::GX_DRAW_QUADS_2: WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2"); SetupQuad(); break; - case OpcodeDecoder::GX_DRAW_TRIANGLES: + case Primitive::GX_DRAW_TRIANGLES: SetupTriangle(); break; - case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: + case Primitive::GX_DRAW_TRIANGLE_STRIP: SetupTriStrip(); break; - case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: + case Primitive::GX_DRAW_TRIANGLE_FAN: SetupTriFan(); break; - case OpcodeDecoder::GX_DRAW_LINES: + case Primitive::GX_DRAW_LINES: SetupLine(); break; - case OpcodeDecoder::GX_DRAW_LINE_STRIP: + case Primitive::GX_DRAW_LINE_STRIP: SetupLineStrip(); break; - case OpcodeDecoder::GX_DRAW_POINTS: + case Primitive::GX_DRAW_POINTS: SetupPoint(); break; } diff --git a/Source/Core/VideoBackends/Software/SetupUnit.h b/Source/Core/VideoBackends/Software/SetupUnit.h index a9f9584e07..e454c73ff3 100644 --- a/Source/Core/VideoBackends/Software/SetupUnit.h +++ b/Source/Core/VideoBackends/Software/SetupUnit.h @@ -6,9 +6,14 @@ #include "Common/CommonTypes.h" #include "VideoBackends/Software/NativeVertexFormat.h" +namespace OpcodeDecoder +{ +enum class Primitive : u8; +} + class SetupUnit { - u8 m_PrimType = 0; + OpcodeDecoder::Primitive m_PrimType{}; int m_VertexCounter = 0; OutputVertexData m_Vertices[3]; @@ -24,7 +29,7 @@ class SetupUnit void SetupPoint(); public: - void Init(u8 primitiveType); + void Init(OpcodeDecoder::Primitive primitive_type); OutputVertexData* GetVertex(); diff --git a/Source/Core/VideoCommon/IndexGenerator.cpp b/Source/Core/VideoCommon/IndexGenerator.cpp index d71f6c8292..be2dc99e3a 100644 --- a/Source/Core/VideoCommon/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/IndexGenerator.cpp @@ -202,25 +202,27 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index) void IndexGenerator::Init() { + using OpcodeDecoder::Primitive; + if (g_Config.backend_info.bSupportsPrimitiveRestart) { - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads; - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan; + m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads; + m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard; + m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan; } else { - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads; - m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip; - m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan; + m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads; + m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard; + m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip; + m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan; } - m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList; - m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip; - m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints; + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints; } void IndexGenerator::Start(u16* index_ptr) @@ -230,7 +232,7 @@ void IndexGenerator::Start(u16* index_ptr) m_base_index = 0; } -void IndexGenerator::AddIndices(int primitive, u32 num_vertices) +void IndexGenerator::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices) { m_index_buffer_current = m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index); diff --git a/Source/Core/VideoCommon/IndexGenerator.h b/Source/Core/VideoCommon/IndexGenerator.h index 00c8f73132..32cf21e207 100644 --- a/Source/Core/VideoCommon/IndexGenerator.h +++ b/Source/Core/VideoCommon/IndexGenerator.h @@ -6,8 +6,9 @@ #pragma once -#include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" +#include "VideoCommon/OpcodeDecoding.h" class IndexGenerator { @@ -15,7 +16,7 @@ public: void Init(); void Start(u16* index_ptr); - void AddIndices(int primitive, u32 num_vertices); + void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices); @@ -30,5 +31,5 @@ private: u32 m_base_index = 0; using PrimitiveFunction = u16* (*)(u16*, u32, u32); - std::array m_primitive_table{}; + Common::EnumMap m_primitive_table{}; }; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index a1abacc4c6..362afd6a7a 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -102,18 +102,18 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) return finish_up(); const u8 cmd_byte = src.Read(); - switch (cmd_byte) + switch (static_cast(cmd_byte)) { - case GX_NOP: + case Opcode::GX_NOP: total_cycles += 6; // Hm, this means that we scan over nop streams pretty slowly... break; - case GX_UNKNOWN_RESET: + case Opcode::GX_UNKNOWN_RESET: total_cycles += 6; // Datel software uses this command DEBUG_LOG_FMT(VIDEO, "GX Reset?: {:08x}", cmd_byte); break; - case GX_LOAD_CP_REG: + case Opcode::GX_LOAD_CP_REG: { if (src.size() < 1 + 4) return finish_up(); @@ -128,7 +128,7 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) } break; - case GX_LOAD_XF_REG: + case Opcode::GX_LOAD_XF_REG: { if (src.size() < 4) return finish_up(); @@ -151,10 +151,10 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) } break; - case GX_LOAD_INDX_A: // Used for position matrices - case GX_LOAD_INDX_B: // Used for normal matrices - case GX_LOAD_INDX_C: // Used for postmatrices - case GX_LOAD_INDX_D: // Used for lights + case Opcode::GX_LOAD_INDX_A: // Used for position matrices + case Opcode::GX_LOAD_INDX_B: // Used for normal matrices + case Opcode::GX_LOAD_INDX_C: // Used for postmatrices + case Opcode::GX_LOAD_INDX_D: // Used for lights { if (src.size() < 4) return finish_up(); @@ -175,7 +175,7 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) } break; - case GX_CMD_CALL_DL: + case Opcode::GX_CMD_CALL_DL: { if (src.size() < 8) return finish_up(); @@ -198,18 +198,18 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) } break; - case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after - // that + case Opcode::GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics + // registers after that total_cycles += 6; DEBUG_LOG_FMT(VIDEO, "GX 0x44: {:08x}", cmd_byte); break; - case GX_CMD_INVL_VC: // Invalidate Vertex Cache + case Opcode::GX_CMD_INVL_VC: // Invalidate Vertex Cache total_cycles += 6; DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)"); break; - case GX_LOAD_BP_REG: + case Opcode::GX_LOAD_BP_REG: // In skipped_frame case: We have to let BP writes through because they set // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. { @@ -242,7 +242,8 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) const u16 num_vertices = src.Read(); const int bytes = VertexLoaderManager::RunVertices( cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, num_vertices, src, is_preprocess); + static_cast((cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT), + num_vertices, src, is_preprocess); if (bytes < 0) return finish_up(); @@ -267,7 +268,7 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) // Display lists get added directly into the FIFO stream if constexpr (!is_preprocess) { - if (g_record_fifo_data && cmd_byte != GX_CMD_CALL_DL) + if (g_record_fifo_data && static_cast(cmd_byte) != Opcode::GX_CMD_CALL_DL) { const u8* const opcode_end = src.GetPointer(); FifoRecorder::GetInstance().WriteGPCommand(opcode_start, u32(opcode_end - opcode_start)); diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index 98e5a292de..df1059f221 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -4,6 +4,7 @@ #pragma once #include "Common/CommonTypes.h" +#include "Common/EnumFormatter.h" class DataReader; @@ -12,7 +13,7 @@ namespace OpcodeDecoder // Global flag to signal if FifoRecorder is active. extern bool g_record_fifo_data; -enum +enum class Opcode { GX_NOP = 0x00, GX_UNKNOWN_RESET = 0x01, @@ -27,20 +28,20 @@ enum GX_CMD_CALL_DL = 0x40, GX_CMD_UNKNOWN_METRICS = 0x44, - GX_CMD_INVL_VC = 0x48 + GX_CMD_INVL_VC = 0x48, + + GX_PRIMITIVE_START = 0x80, + GX_PRIMITIVE_END = 0xbf, }; -enum -{ - GX_PRIMITIVE_MASK = 0x78, - GX_PRIMITIVE_SHIFT = 3, - GX_VAT_MASK = 0x07 -}; +constexpr u8 GX_PRIMITIVE_MASK = 0x78; +constexpr u32 GX_PRIMITIVE_SHIFT = 3; +constexpr u8 GX_VAT_MASK = 0x07; // These values are the values extracted using GX_PRIMITIVE_MASK // and GX_PRIMITIVE_SHIFT. // GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS. -enum +enum class Primitive : u8 { GX_DRAW_QUADS = 0x0, // 0x80 GX_DRAW_QUADS_2 = 0x1, // 0x88 @@ -58,3 +59,16 @@ template u8* Run(DataReader src, u32* cycles, bool in_display_list); } // namespace OpcodeDecoder + +template <> +struct fmt::formatter + : EnumFormatter +{ + static constexpr array_type names = { + "GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)", + "GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP", + "GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES", + "GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS", + }; + formatter() : EnumFormatter(names) {} +}; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 33af56762f..b794c5dfc0 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -239,7 +239,8 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal return loader; } -int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess) +int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, + bool is_preprocess) { if (!count) return 0; @@ -266,7 +267,8 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // They still need to go through vertex loading, because we need to calculate a zfreeze refrence // slope. - bool cullall = (bpmem.genMode.cullmode == CullMode::All && primitive < 5); + bool cullall = (bpmem.genMode.cullmode == CullMode::All && + primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); DataReader dst = g_vertex_manager->PrepareForAdditionalData( primitive, count, loader->m_native_vtx_decl.stride, cullall); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index b43f0d919f..bd9066c5bb 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -14,6 +14,11 @@ class DataReader; class NativeVertexFormat; struct PortableVertexDeclaration; +namespace OpcodeDecoder +{ +enum class Primitive : u8; +}; + namespace VertexLoaderManager { using NativeVertexFormatMap = @@ -35,7 +40,8 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl); // Returns -1 if buf_size is insufficient, else the amount of bytes consumed -int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess); +int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, + bool is_preprocess); NativeVertexFormat* GetCurrentVertexFormat(); diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 63213edd3a..5fa85b2761 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -10,6 +10,7 @@ #include "Common/BitSet.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" #include "Common/MathUtil.h" @@ -38,8 +39,10 @@ std::unique_ptr g_vertex_manager; +using OpcodeDecoder::Primitive; + // GX primitive -> RenderState primitive, no primitive restart -constexpr std::array primitive_from_gx{{ +constexpr Common::EnumMap primitive_from_gx{ PrimitiveType::Triangles, // GX_DRAW_QUADS PrimitiveType::Triangles, // GX_DRAW_QUADS_2 PrimitiveType::Triangles, // GX_DRAW_TRIANGLES @@ -48,10 +51,10 @@ constexpr std::array primitive_from_gx{{ PrimitiveType::Lines, // GX_DRAW_LINES PrimitiveType::Lines, // GX_DRAW_LINE_STRIP PrimitiveType::Points, // GX_DRAW_POINTS -}}; +}; // GX primitive -> RenderState primitive, using primitive restart -constexpr std::array primitive_from_gx_pr{{ +constexpr Common::EnumMap primitive_from_gx_pr{ PrimitiveType::TriangleStrip, // GX_DRAW_QUADS PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2 PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES @@ -60,7 +63,7 @@ constexpr std::array primitive_from_gx_pr{{ PrimitiveType::Lines, // GX_DRAW_LINES PrimitiveType::Lines, // GX_DRAW_LINE_STRIP PrimitiveType::Points, // GX_DRAW_POINTS -}}; +}; // Due to the BT.601 standard which the GameCube is based on being a compromise // between PAL and NTSC, neither standard gets square pixels. They are each off @@ -107,13 +110,13 @@ u32 VertexManagerBase::GetRemainingSize() const return static_cast(m_end_buffer_pointer - m_cur_buffer_pointer); } -void VertexManagerBase::AddIndices(int primitive, u32 num_vertices) +void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices) { m_index_generator.AddIndices(primitive, num_vertices); } -DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, - bool cullall) +DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, + u32 count, u32 stride, bool cullall) { // Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently. g_framebuffer_manager->FlushEFBPokes(); @@ -185,7 +188,7 @@ void VertexManagerBase::FlushData(u32 count, u32 stride) m_cur_buffer_pointer += count * stride; } -u32 VertexManagerBase::GetRemainingIndices(int primitive) const +u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const { const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen(); @@ -193,22 +196,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const { switch (primitive) { - case OpcodeDecoder::GX_DRAW_QUADS: - case OpcodeDecoder::GX_DRAW_QUADS_2: + case Primitive::GX_DRAW_QUADS: + case Primitive::GX_DRAW_QUADS_2: return index_len / 5 * 4; - case OpcodeDecoder::GX_DRAW_TRIANGLES: + case Primitive::GX_DRAW_TRIANGLES: return index_len / 4 * 3; - case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: + case Primitive::GX_DRAW_TRIANGLE_STRIP: return index_len / 1 - 1; - case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: + case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 6 * 4 + 1; - case OpcodeDecoder::GX_DRAW_LINES: + case Primitive::GX_DRAW_LINES: return index_len; - case OpcodeDecoder::GX_DRAW_LINE_STRIP: + case Primitive::GX_DRAW_LINE_STRIP: return index_len / 2 + 1; - case OpcodeDecoder::GX_DRAW_POINTS: + case Primitive::GX_DRAW_POINTS: return index_len; default: @@ -219,22 +222,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const { switch (primitive) { - case OpcodeDecoder::GX_DRAW_QUADS: - case OpcodeDecoder::GX_DRAW_QUADS_2: + case Primitive::GX_DRAW_QUADS: + case Primitive::GX_DRAW_QUADS_2: return index_len / 6 * 4; - case OpcodeDecoder::GX_DRAW_TRIANGLES: + case Primitive::GX_DRAW_TRIANGLES: return index_len; - case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP: + case Primitive::GX_DRAW_TRIANGLE_STRIP: return index_len / 3 + 2; - case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN: + case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 3 + 2; - case OpcodeDecoder::GX_DRAW_LINES: + case Primitive::GX_DRAW_LINES: return index_len; - case OpcodeDecoder::GX_DRAW_LINE_STRIP: + case Primitive::GX_DRAW_LINE_STRIP: return index_len / 2 + 1; - case OpcodeDecoder::GX_DRAW_POINTS: + case Primitive::GX_DRAW_POINTS: return index_len; default: diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index f41be70836..b3dd49aa61 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -35,6 +35,11 @@ enum TexelBufferFormat : u32 NUM_TEXEL_BUFFER_FORMATS }; +namespace OpcodeDecoder +{ +enum class Primitive : u8; +}; + class VertexManagerBase { private: @@ -93,8 +98,9 @@ public: virtual bool Initialize(); PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } - void AddIndices(int primitive, u32 num_vertices); - DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); + void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); + DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, + bool cullall); void FlushData(u32 count, u32 stride); void Flush(); @@ -163,7 +169,7 @@ protected: virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex); u32 GetRemainingSize() const; - u32 GetRemainingIndices(int primitive) const; + u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const; void CalculateZSlope(NativeVertexFormat* format); void LoadTextures(); From 1914087998bc296208355dc4b1b2553430b081b2 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 20 Jun 2021 13:47:57 -0700 Subject: [PATCH 08/23] Create and use CPArray enum class --- Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp | 12 ++-- Source/Core/Core/FifoPlayer/FifoAnalyzer.h | 6 +- .../Core/FifoPlayer/FifoRecordAnalyzer.cpp | 16 +++--- .../Core/Core/FifoPlayer/FifoRecordAnalyzer.h | 6 +- Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp | 16 +++--- Source/Core/VideoCommon/CPMemory.cpp | 10 ++-- Source/Core/VideoCommon/CPMemory.h | 55 +++++++++++++------ Source/Core/VideoCommon/OpcodeDecoding.cpp | 6 +- Source/Core/VideoCommon/VertexLoaderARM64.cpp | 26 +++++---- Source/Core/VideoCommon/VertexLoaderARM64.h | 6 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 39 ++++++++----- Source/Core/VideoCommon/VertexLoaderManager.h | 3 +- Source/Core/VideoCommon/VertexLoaderX64.cpp | 17 +++--- Source/Core/VideoCommon/VertexLoaderX64.h | 3 +- .../Core/VideoCommon/VertexLoader_Color.cpp | 26 ++++----- .../Core/VideoCommon/VertexLoader_Normal.cpp | 10 ++-- .../VideoCommon/VertexLoader_Position.cpp | 4 +- .../VideoCommon/VertexLoader_TextCoord.cpp | 4 +- Source/Core/VideoCommon/XFMemory.h | 4 +- Source/Core/VideoCommon/XFStructs.cpp | 14 ++--- Source/Core/VideoCommon/XFStructs.h | 2 +- .../VideoCommon/VertexLoaderTest.cpp | 12 ++-- 22 files changed, 171 insertions(+), 126 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp index a2103bd8a8..ca9ed3574d 100644 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp @@ -190,11 +190,11 @@ u32 AnalyzeCommand(const u8* data, DecodeMode mode) { s_DrawingObject = false; - int array = 0xc + (cmd - static_cast(Opcode::GX_LOAD_INDX_A)) / 8; + CPArray array = static_cast(0xc + (cmd - static_cast(Opcode::GX_LOAD_INDX_A)) / 8); u32 value = ReadFifo32(data); if (mode == DecodeMode::Record) - FifoRecordAnalyzer::ProcessLoadIndexedXf(value, array); + FifoRecordAnalyzer::ProcessLoadIndexedXf(array, value); break; } @@ -238,8 +238,8 @@ u32 AnalyzeCommand(const u8* data, DecodeMode mode) { for (size_t i = 0; i < offsets.size(); ++i) { - FifoRecordAnalyzer::WriteVertexArray(static_cast(i), data + offsets[i], vertexSize, - numVertices); + FifoRecordAnalyzer::WriteVertexArray(static_cast(i), data + offsets[i], + vertexSize, numVertices); } } @@ -284,11 +284,11 @@ void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem) break; case ARRAY_BASE: - cpMem.arrayBases[subCmd & CP_ARRAY_MASK] = value; + cpMem.arrayBases[static_cast(subCmd & CP_ARRAY_MASK)] = value; break; case ARRAY_STRIDE: - cpMem.arrayStrides[subCmd & CP_ARRAY_MASK] = value & 0xFF; + cpMem.arrayStrides[static_cast(subCmd & CP_ARRAY_MASK)] = value & 0xFF; break; } } diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoAnalyzer.h index 4e167cbc9f..e9604ce918 100644 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.h +++ b/Source/Core/Core/FifoPlayer/FifoAnalyzer.h @@ -6,6 +6,8 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" + #include "VideoCommon/CPMemory.h" namespace FifoAnalyzer @@ -22,8 +24,8 @@ struct CPMemory { TVtxDesc vtxDesc; std::array vtxAttr; - std::array arrayBases{}; - std::array arrayStrides{}; + Common::EnumMap arrayBases{}; + Common::EnumMap arrayStrides{}; }; void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem); diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp index 1f9adcc54c..4ada443fcf 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp @@ -30,7 +30,7 @@ void FifoRecordAnalyzer::Initialize(const u32* cpMem) std::copy(strides_start, strides_end, s_CpMem.arrayStrides.begin()); } -void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array) +void FifoRecordAnalyzer::ProcessLoadIndexedXf(CPArray array, u32 val) { int index = val >> 16; int size = ((val >> 12) & 0xF) + 1; @@ -40,19 +40,19 @@ void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array) FifoRecorder::GetInstance().UseMemory(address, size * 4, MemoryUpdate::XF_DATA); } -void FifoRecordAnalyzer::WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize, +void FifoRecordAnalyzer::WriteVertexArray(CPArray arrayIndex, const u8* vertexData, int vertexSize, int numVertices) { // Skip if not indexed array VertexComponentFormat arrayType; - if (arrayIndex == ARRAY_POSITION) + if (arrayIndex == CPArray::Position) arrayType = s_CpMem.vtxDesc.low.Position; - else if (arrayIndex == ARRAY_NORMAL) + else if (arrayIndex == CPArray::Normal) arrayType = s_CpMem.vtxDesc.low.Normal; - else if (arrayIndex >= ARRAY_COLOR0 && arrayIndex < ARRAY_COLOR0 + NUM_COLOR_ARRAYS) - arrayType = s_CpMem.vtxDesc.low.Color[arrayIndex - ARRAY_COLOR0]; - else if (arrayIndex >= ARRAY_TEXCOORD0 && arrayIndex < ARRAY_TEXCOORD0 + NUM_TEXCOORD_ARRAYS) - arrayType = s_CpMem.vtxDesc.high.TexCoord[arrayIndex - ARRAY_TEXCOORD0]; + else if (arrayIndex >= CPArray::Color0 && arrayIndex <= CPArray::Color1) + arrayType = s_CpMem.vtxDesc.low.Color[u8(arrayIndex) - u8(CPArray::Color0)]; + else if (arrayIndex >= CPArray::TexCoord0 && arrayIndex <= CPArray::TexCoord7) + arrayType = s_CpMem.vtxDesc.high.TexCoord[u8(arrayIndex) - u8(CPArray::TexCoord0)]; else { PanicAlertFmt("Invalid arrayIndex {}", arrayIndex); diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h index 8c3bd00a86..d1ac21c09b 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h +++ b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h @@ -5,11 +5,13 @@ #include "Common/CommonTypes.h" +enum class CPArray : u8; + namespace FifoRecordAnalyzer { // Must call this before analyzing Fifo commands with FifoAnalyzer::AnalyzeCommand() void Initialize(const u32* cpMem); -void ProcessLoadIndexedXf(u32 val, int array); -void WriteVertexArray(int arrayIndex, const u8* vertexData, int vertexSize, int numVertices); +void ProcessLoadIndexedXf(CPArray array, u32 val); +void WriteVertexArray(CPArray arrayIndex, const u8* vertexData, int vertexSize, int numVertices); } // namespace FifoRecordAnalyzer diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 21a4572d38..73ba823ec1 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -287,7 +287,7 @@ void FIFOAnalyzer::UpdateDetails() case Opcode::GX_LOAD_INDX_A: { const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(&object[object_offset])); + GetXFIndexedLoadInfo(CPArray::XF_A, Common::swap32(&object[object_offset])); object_offset += 4; new_label = QStringLiteral("LOAD INDX A %1").arg(QString::fromStdString(desc)); } @@ -295,7 +295,7 @@ void FIFOAnalyzer::UpdateDetails() case Opcode::GX_LOAD_INDX_B: { const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(&object[object_offset])); + GetXFIndexedLoadInfo(CPArray::XF_B, Common::swap32(&object[object_offset])); object_offset += 4; new_label = QStringLiteral("LOAD INDX B %1").arg(QString::fromStdString(desc)); } @@ -303,7 +303,7 @@ void FIFOAnalyzer::UpdateDetails() case Opcode::GX_LOAD_INDX_C: { const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(&object[object_offset])); + GetXFIndexedLoadInfo(CPArray::XF_C, Common::swap32(&object[object_offset])); object_offset += 4; new_label = QStringLiteral("LOAD INDX C %1").arg(QString::fromStdString(desc)); } @@ -311,7 +311,7 @@ void FIFOAnalyzer::UpdateDetails() case Opcode::GX_LOAD_INDX_D: { const auto [desc, written] = - GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(&object[object_offset])); + GetXFIndexedLoadInfo(CPArray::XF_D, Common::swap32(&object[object_offset])); object_offset += 4; new_label = QStringLiteral("LOAD INDX D %1").arg(QString::fromStdString(desc)); } @@ -619,7 +619,7 @@ void FIFOAnalyzer::UpdateDescription() } else if (opcode == Opcode::GX_LOAD_INDX_A) { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_A, Common::swap32(cmddata + 1)); + const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_A, Common::swap32(cmddata + 1)); text = QString::fromStdString(desc); text += QLatin1Char{'\n'}; @@ -629,7 +629,7 @@ void FIFOAnalyzer::UpdateDescription() } else if (opcode == Opcode::GX_LOAD_INDX_B) { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_B, Common::swap32(cmddata + 1)); + const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_B, Common::swap32(cmddata + 1)); text = QString::fromStdString(desc); text += QLatin1Char{'\n'}; @@ -641,7 +641,7 @@ void FIFOAnalyzer::UpdateDescription() } else if (opcode == Opcode::GX_LOAD_INDX_C) { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_C, Common::swap32(cmddata + 1)); + const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_C, Common::swap32(cmddata + 1)); text = QString::fromStdString(desc); text += QLatin1Char{'\n'}; @@ -652,7 +652,7 @@ void FIFOAnalyzer::UpdateDescription() } else if (opcode == Opcode::GX_LOAD_INDX_D) { - const auto [desc, written] = GetXFIndexedLoadInfo(ARRAY_XF_D, Common::swap32(cmddata + 1)); + const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_D, Common::swap32(cmddata + 1)); text = QString::fromStdString(desc); text += QLatin1Char{'\n'}; diff --git a/Source/Core/VideoCommon/CPMemory.cpp b/Source/Core/VideoCommon/CPMemory.cpp index 1184e10ca3..afa354b4e4 100644 --- a/Source/Core/VideoCommon/CPMemory.cpp +++ b/Source/Core/VideoCommon/CPMemory.cpp @@ -62,11 +62,13 @@ std::pair GetCPRegInfo(u8 cmd, u32 value) return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK), fmt::to_string(UVAT_group2{.Hex = value})); case ARRAY_BASE: - return std::make_pair(fmt::format("ARRAY_BASE Array {}", cmd & CP_ARRAY_MASK), - fmt::format("Base address {:08x}", value)); + return std::make_pair( + fmt::format("ARRAY_BASE Array {}", static_cast(cmd & CP_ARRAY_MASK)), + fmt::format("Base address {:08x}", value)); case ARRAY_STRIDE: - return std::make_pair(fmt::format("ARRAY_STRIDE Array {}", cmd - ARRAY_STRIDE), - fmt::format("Stride {:02x}", value & 0xff)); + return std::make_pair( + fmt::format("ARRAY_STRIDE Array {}", static_cast(cmd & CP_ARRAY_MASK)), + fmt::format("Stride {:02x}", value & 0xff)); default: return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), ""); } diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 81d0316cec..1a937d15c9 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -11,6 +11,7 @@ #include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/EnumFormatter.h" +#include "Common/EnumMap.h" #include "Common/MsgHandler.h" enum @@ -53,24 +54,46 @@ enum }; // Vertex array numbers -enum +enum class CPArray : u8 { - ARRAY_POSITION = 0, - ARRAY_NORMAL = 1, - ARRAY_COLOR0 = 2, - NUM_COLOR_ARRAYS = 2, - ARRAY_TEXCOORD0 = 4, - NUM_TEXCOORD_ARRAYS = 8, + Position = 0, + Normal = 1, - ARRAY_XF_A = 12, // Usually used for position matrices - ARRAY_XF_B = 13, // Usually used for normal matrices - ARRAY_XF_C = 14, // Usually used for tex coord matrices - ARRAY_XF_D = 15, // Usually used for light objects + Color0 = 2, + Color1 = 3, - // Number of arrays related to vertex components (position, normal, color, tex coord) - // Excludes the 4 arrays used for indexed XF loads - NUM_VERTEX_COMPONENT_ARRAYS = 12, + TexCoord0 = 4, + TexCoord1 = 5, + TexCoord2 = 6, + TexCoord3 = 7, + TexCoord4 = 8, + TexCoord5 = 9, + TexCoord6 = 10, + TexCoord7 = 11, + + XF_A = 12, // Usually used for position matrices + XF_B = 13, // Usually used for normal matrices + XF_C = 14, // Usually used for tex coord matrices + XF_D = 15, // Usually used for light objects }; +template <> +struct fmt::formatter : EnumFormatter +{ + static constexpr array_type names = {"Position", "Normal", "Color 0", "Color 1", + "Tex Coord 0", "Tex Coord 1", "Tex Coord 2", "Tex Coord 3", + "Tex Coord 4", "Tex Coord 5", "Tex Coord 6", "Tex Coord 7", + "XF A", "XF B", "XF C", "XF D"}; + formatter() : EnumFormatter(names) {} +}; +// Intended for offsetting from Color0/TexCoord0 +constexpr CPArray operator+(CPArray array, u8 offset) +{ + return static_cast(static_cast(array) + offset); +} + +// Number of arrays related to vertex components (position, normal, color, tex coord) +// Excludes the 4 arrays used for indexed XF loads +constexpr u8 NUM_VERTEX_COMPONENT_ARRAYS = 12; // Vertex components enum class VertexComponentFormat @@ -607,8 +630,8 @@ class VertexLoaderBase; // STATE_TO_SAVE struct CPState final { - u32 array_bases[CP_NUM_ARRAYS]{}; - u32 array_strides[CP_NUM_ARRAYS]{}; + Common::EnumMap array_bases; + Common::EnumMap array_strides; TMatrixIndexA matrix_index_a{}; TMatrixIndexB matrix_index_b{}; TVtxDesc vtx_desc; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index 362afd6a7a..be879ddfbc 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -166,12 +166,12 @@ u8* Run(DataReader src, u32* cycles, bool in_display_list) // GX_LOAD_INDX_B (40) -> 0xD // GX_LOAD_INDX_C (48) -> 0xE // GX_LOAD_INDX_D (56) -> 0xF - const int ref_array = (cmd_byte / 8) + 8; + const auto array = static_cast((cmd_byte / 8) + 8); if constexpr (is_preprocess) - PreprocessIndexedXF(src.Read(), ref_array); + PreprocessIndexedXF(array, src.Read()); else - LoadIndexedXF(src.Read(), ref_array); + LoadIndexedXF(array, src.Read()); } break; diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 71fc9e054c..6663e6c8ae 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -6,6 +6,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoaderManager.h" @@ -59,7 +60,7 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at WriteProtect(); } -void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute, ARM64Reg reg) +void VertexLoaderARM64::GetVertexAddr(CPArray array, VertexComponentFormat attribute, ARM64Reg reg) { if (IsIndexed(attribute)) { @@ -95,7 +96,7 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute REV16(scratch1_reg, scratch1_reg); } - if (array == ARRAY_POSITION) + if (array == CPArray::Position) { EOR(scratch2_reg, scratch1_reg, attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) : @@ -103,17 +104,18 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute m_skip_vertex = CBZ(scratch2_reg); } - LDR(IndexType::Unsigned, scratch2_reg, stride_reg, array * 4); + LDR(IndexType::Unsigned, scratch2_reg, stride_reg, static_cast(array) * 4); MUL(scratch1_reg, scratch1_reg, scratch2_reg); - LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, array * 8); + LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, + static_cast(array) * 8); ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg)); } else ADD(reg, src_reg, m_src_ofs); } -s32 VertexLoaderARM64::GetAddressImm(int array, VertexComponentFormat attribute, +s32 VertexLoaderARM64::GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align) { if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1)))) @@ -448,8 +450,8 @@ void VertexLoaderARM64::GenerateVertexLoader() int load_size = GetLoadSize(load_bytes); load_size <<= 3; - s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.low.Position, EncodeRegTo64(scratch1_reg), - load_size); + s32 offset = GetAddressImm(CPArray::Position, m_VtxDesc.low.Position, + EncodeRegTo64(scratch1_reg), load_size); ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements, m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position, offset); } @@ -470,7 +472,7 @@ void VertexLoaderARM64::GenerateVertexLoader() int load_bytes = elem_size * 3; int load_size = GetLoadSize(load_bytes); - offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg), + offset = GetAddressImm(CPArray::Normal, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg), load_size << 3); if (offset == -1) @@ -488,7 +490,7 @@ void VertexLoaderARM64::GenerateVertexLoader() } } - for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++) { m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; @@ -501,7 +503,7 @@ void VertexLoaderARM64::GenerateVertexLoader() m_VtxAttr.GetColorFormat(i) == ColorFormat::RGBA4444) align = 2; - s32 offset = GetAddressImm(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i], + s32 offset = GetAddressImm(CPArray::Color0 + i, m_VtxDesc.low.Color[i], EncodeRegTo64(scratch1_reg), align); ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i), offset); m_native_vtx_decl.colors[i].components = 4; @@ -513,7 +515,7 @@ void VertexLoaderARM64::GenerateVertexLoader() } } - for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { m_native_vtx_decl.texcoords[i].offset = m_dst_ofs; m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; @@ -527,7 +529,7 @@ void VertexLoaderARM64::GenerateVertexLoader() int load_size = GetLoadSize(load_bytes); load_size <<= 3; - s32 offset = GetAddressImm(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i], + s32 offset = GetAddressImm(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i], EncodeRegTo64(scratch1_reg), load_size); u8 scaling_exponent = m_VtxAttr.GetTexFrac(i); ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements, diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.h b/Source/Core/VideoCommon/VertexLoaderARM64.h index a2190a6965..eccf3f0ad8 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.h +++ b/Source/Core/VideoCommon/VertexLoaderARM64.h @@ -11,6 +11,7 @@ class DataReader; enum class VertexComponentFormat; enum class ComponentFormat; enum class ColorFormat; +enum class CPArray : u8; class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock { @@ -25,8 +26,9 @@ private: u32 m_dst_ofs = 0; Arm64Gen::FixupBranch m_skip_vertex; Arm64Gen::ARM64FloatEmitter m_float_emit; - void GetVertexAddr(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg); - s32 GetAddressImm(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align); + void GetVertexAddr(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg); + s32 GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, + u32 align); int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format, s32 offset = -1); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index b794c5dfc0..e2475d666f 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -14,6 +14,7 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/Logging/Log.h" #include "Core/DolphinAnalytics.h" @@ -48,7 +49,14 @@ static std::mutex s_vertex_loader_map_lock; static VertexLoaderMap s_vertex_loader_map; // TODO - change into array of pointers. Keep a map of all seen so far. -u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS]; +Common::EnumMap cached_arraybases; + +BitSet8 g_main_vat_dirty; +BitSet8 g_preprocess_vat_dirty; +bool g_bases_dirty; // Main only +u8 g_current_vat; // Main only +std::array g_main_vertex_loaders; +std::array g_preprocess_vertex_loaders; void Init() { @@ -80,24 +88,25 @@ void UpdateVertexArrayPointers() // 12 through 15 are used for loading data into xfmem. // We also only update the array base if the vertex description states we are going to use it. if (IsIndexed(g_main_cp_state.vtx_desc.low.Position)) - cached_arraybases[ARRAY_POSITION] = - Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_POSITION]); + cached_arraybases[CPArray::Position] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Position]); if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal)) - cached_arraybases[ARRAY_NORMAL] = Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_NORMAL]); + cached_arraybases[CPArray::Normal] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Normal]); - for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++) + for (u8 i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++) { if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i])) - cached_arraybases[ARRAY_COLOR0 + i] = - Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_COLOR0 + i]); + cached_arraybases[CPArray::Color0 + i] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Color0 + i]); } - for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++) + for (u8 i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++) { if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i])) - cached_arraybases[ARRAY_TEXCOORD0 + i] = - Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_TEXCOORD0 + i]); + cached_arraybases[CPArray::TexCoord0 + i] = + Memory::GetPointer(g_main_cp_state.array_bases[CPArray::TexCoord0 + i]); } g_main_cp_state.bases_dirty = false; @@ -398,13 +407,13 @@ void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess) // Pointers to vertex arrays in GC RAM case ARRAY_BASE: - state->array_bases[sub_cmd & CP_ARRAY_MASK] = + state->array_bases[static_cast(sub_cmd & CP_ARRAY_MASK)] = value & CommandProcessor::GetPhysicalAddressMask(); state->bases_dirty = true; break; case ARRAY_STRIDE: - state->array_strides[sub_cmd & CP_ARRAY_MASK] = value & 0xFF; + state->array_strides[static_cast(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF; break; default: @@ -427,9 +436,9 @@ void FillCPMemoryArray(u32* memory) memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex; } - for (int i = 0; i < CP_NUM_ARRAYS; ++i) + for (u8 i = 0; i < CP_NUM_ARRAYS; ++i) { - memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[i]; - memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[i]; + memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[static_cast(i)]; + memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[static_cast(i)]; } } diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index bd9066c5bb..4eeae12742 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -8,6 +8,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "VideoCommon/CPMemory.h" class DataReader; @@ -46,7 +47,7 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun NativeVertexFormat* GetCurrentVertexFormat(); // Resolved pointers to array bases. Used by vertex loaders. -extern u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS]; +extern Common::EnumMap cached_arraybases; void UpdateVertexArrayPointers(); // Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite). diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index b204e131d7..4022863e59 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -15,6 +15,7 @@ #include "Common/JitRegister.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" +#include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/VertexLoaderManager.h" @@ -54,7 +55,7 @@ VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att) JitRegister::Register(region, GetCodePtr(), name.c_str()); } -OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute) +OpArg VertexLoaderX64::GetVertexAddr(CPArray array, VertexComponentFormat attribute) { OpArg data = MDisp(src_reg, m_src_ofs); if (IsIndexed(attribute)) @@ -62,7 +63,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute) int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16; LoadAndSwap(bits, scratch1, data); m_src_ofs += bits / 8; - if (array == ARRAY_POSITION) + if (array == CPArray::Position) { CMP(bits, R(scratch1), Imm8(-1)); m_skip_vertex = J_CC(CC_E, true); @@ -433,7 +434,7 @@ void VertexLoaderX64::GenerateVertexLoader() texmatidx_ofs[i] = m_src_ofs++; } - OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.low.Position); + OpArg data = GetVertexAddr(CPArray::Position, m_VtxDesc.low.Position); int pos_elements = m_VtxAttr.g0.PosElements == CoordComponentCount::XY ? 2 : 3; ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements, m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position); @@ -448,7 +449,7 @@ void VertexLoaderX64::GenerateVertexLoader() { if (!i || m_VtxAttr.g0.NormalIndex3) { - data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.low.Normal); + data = GetVertexAddr(CPArray::Normal, m_VtxDesc.low.Normal); int elem_size = GetElementSize(m_VtxAttr.g0.NormalFormat); data.AddMemOffset(i * elem_size * 3); } @@ -457,11 +458,11 @@ void VertexLoaderX64::GenerateVertexLoader() } } - for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++) { if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) { - data = GetVertexAddr(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i]); + data = GetVertexAddr(CPArray::Color0 + i, m_VtxDesc.low.Color[i]); ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i)); m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].enable = true; @@ -472,12 +473,12 @@ void VertexLoaderX64::GenerateVertexLoader() } } - for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) + for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::ST ? 2 : 1; if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent) { - data = GetVertexAddr(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i]); + data = GetVertexAddr(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i]); u8 scaling_exponent = m_VtxAttr.GetTexFrac(i); ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements, m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.g0.ByteDequant, diff --git a/Source/Core/VideoCommon/VertexLoaderX64.h b/Source/Core/VideoCommon/VertexLoaderX64.h index 8a3fd5aa6b..6a0cf7b785 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.h +++ b/Source/Core/VideoCommon/VertexLoaderX64.h @@ -10,6 +10,7 @@ enum class VertexComponentFormat; enum class ComponentFormat; enum class ColorFormat; +enum class CPArray : u8; class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock { @@ -23,7 +24,7 @@ private: u32 m_src_ofs = 0; u32 m_dst_ofs = 0; Gen::FixupBranch m_skip_vertex; - Gen::OpArg GetVertexAddr(int array, VertexComponentFormat attribute); + Gen::OpArg GetVertexAddr(CPArray array, VertexComponentFormat attribute); int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format); diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 4e71889bbd..36939557b8 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -79,8 +79,8 @@ void Color_ReadIndex_16b_565(VertexLoader* loader) { const auto index = DataRead(); const u8* const address = - VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); u16 value; std::memcpy(&value, address, sizeof(u16)); @@ -92,8 +92,8 @@ template void Color_ReadIndex_24b_888(VertexLoader* loader) { const auto index = DataRead(); - const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); SetCol(loader, Read24(address)); } @@ -101,18 +101,18 @@ template void Color_ReadIndex_32b_888x(VertexLoader* loader) { const auto index = DataRead(); - const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); SetCol(loader, Read24(address)); } template void Color_ReadIndex_16b_4444(VertexLoader* loader) { - auto const index = DataRead(); + const auto index = DataRead(); const u8* const address = - VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); u16 value; std::memcpy(&value, address, sizeof(u16)); @@ -124,8 +124,8 @@ template void Color_ReadIndex_24b_6666(VertexLoader* loader) { const auto index = DataRead(); - const u8* data = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]) - 1; + const u8* data = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]) - 1; const u32 val = Common::swap32(data); SetCol6666(loader, val); } @@ -134,8 +134,8 @@ template void Color_ReadIndex_32b_8888(VertexLoader* loader) { const auto index = DataRead(); - const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]); + const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); SetCol(loader, Read32(address)); } diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index f19f27eda3..254bcacff3 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -59,7 +59,7 @@ struct Normal_Direct { static void function([[maybe_unused]] VertexLoader* loader) { - auto const source = reinterpret_cast(DataGetPosition()); + const auto source = reinterpret_cast(DataGetPosition()); ReadIndirect(source); DataSkip(); } @@ -72,10 +72,10 @@ void Normal_Index_Offset() { static_assert(std::is_unsigned_v, "Only unsigned I is sane!"); - auto const index = DataRead(); - auto const data = reinterpret_cast( - VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] + - (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); + const auto index = DataRead(); + const auto data = reinterpret_cast( + VertexLoaderManager::cached_arraybases[CPArray::Normal] + + (index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset); ReadIndirect(data); } diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 37b15de53c..0fe8e7ba72 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -60,8 +60,8 @@ void Pos_ReadIndex(VertexLoader* loader) const auto index = DataRead(); loader->m_vertexSkip = index == std::numeric_limits::max(); const auto data = - reinterpret_cast(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] + - (index * g_main_cp_state.array_strides[ARRAY_POSITION])); + reinterpret_cast(VertexLoaderManager::cached_arraybases[CPArray::Position] + + (index * g_main_cp_state.array_strides[CPArray::Position])); const auto scale = loader->m_posScale; DataReader dst(g_vertex_manager_write_ptr, nullptr); diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index f5741f6423..89891df5a8 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -55,8 +55,8 @@ void TexCoord_ReadIndex(VertexLoader* loader) const auto index = DataRead(); const auto data = reinterpret_cast( - VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + - (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + VertexLoaderManager::cached_arraybases[CPArray::TexCoord0 + loader->m_tcIndex] + + (index * g_main_cp_state.array_strides[CPArray::TexCoord0 + loader->m_tcIndex])); const auto scale = loader->m_tcScale[loader->m_tcIndex]; DataReader dst(g_vertex_manager_write_ptr, nullptr); diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index 2a44497ec7..32c5dbb607 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -459,5 +459,5 @@ static_assert(sizeof(XFMemory) == sizeof(u32) * 0x1058); extern XFMemory xfmem; void LoadXFReg(u32 transferSize, u32 address, DataReader src); -void LoadIndexedXF(u32 val, int array); -void PreprocessIndexedXF(u32 val, int refarray); +void LoadIndexedXF(CPArray array, u32 val); +void PreprocessIndexedXF(CPArray array, u32 val); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index ebf9e8efab..c31b8cbb54 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -274,7 +274,7 @@ constexpr std::tuple ExtractIndexedXF(u32 val) } // TODO - verify that it is correct. Seems to work, though. -void LoadIndexedXF(u32 val, int refarray) +void LoadIndexedXF(CPArray array, u32 val) { const auto [index, address, size] = ExtractIndexedXF(val); // load stuff from array to address in xf mem @@ -287,8 +287,8 @@ void LoadIndexedXF(u32 val, int refarray) } else { - newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + - g_main_cp_state.array_strides[refarray] * index); + newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] + + g_main_cp_state.array_strides[array] * index); } bool changed = false; for (u32 i = 0; i < size; ++i) @@ -307,12 +307,12 @@ void LoadIndexedXF(u32 val, int refarray) } } -void PreprocessIndexedXF(u32 val, int refarray) +void PreprocessIndexedXF(CPArray array, u32 val) { const auto [index, address, size] = ExtractIndexedXF(val); - const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] + - g_preprocess_cp_state.array_strides[refarray] * index); + const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[array] + + g_preprocess_cp_state.array_strides[array] * index); const size_t buf_size = size * sizeof(u32); Fifo::PushFifoAuxBuffer(new_data, buf_size); @@ -655,7 +655,7 @@ std::pair GetXFTransferInfo(const u8* data) return std::make_pair(fmt::to_string(name), fmt::to_string(desc)); } -std::pair GetXFIndexedLoadInfo(u8 array, u32 value) +std::pair GetXFIndexedLoadInfo(CPArray array, u32 value) { const auto [index, address, size] = ExtractIndexedXF(value); diff --git a/Source/Core/VideoCommon/XFStructs.h b/Source/Core/VideoCommon/XFStructs.h index 8f839baf97..7e1cc2c49f 100644 --- a/Source/Core/VideoCommon/XFStructs.h +++ b/Source/Core/VideoCommon/XFStructs.h @@ -12,4 +12,4 @@ std::pair GetXFRegInfo(u32 address, u32 value); std::string GetXFMemName(u32 address); std::string GetXFMemDescription(u32 address, u32 value); std::pair GetXFTransferInfo(const u8* data); -std::pair GetXFIndexedLoadInfo(u8 array, u32 value); +std::pair GetXFIndexedLoadInfo(CPArray array, u32 value); diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 81b3e5ee53..e72fe28c29 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -174,8 +174,8 @@ TEST_P(VertexLoaderParamTest, PositionAll) Input(i); else Input(i); - VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); - g_main_cp_state.array_strides[ARRAY_POSITION] = elem_count * elem_size; + VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Position] = elem_count * elem_size; } CreateAndCheckSizes(input_size, elem_count * sizeof(float)); for (float value : values) @@ -243,8 +243,8 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY) CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float)); Input(1); Input(0); - VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); - g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;) + VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer(); + g_main_cp_state.array_strides[CPArray::Position] = sizeof(float); // ;) Input(1.f); Input(2.f); Input(3.f); @@ -357,8 +357,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) for (int i = 0; i < NUM_VERTEX_COMPONENT_ARRAYS; i++) { - VertexLoaderManager::cached_arraybases[i] = m_src.GetPointer(); - g_main_cp_state.array_strides[i] = 129; + VertexLoaderManager::cached_arraybases[static_cast(i)] = m_src.GetPointer(); + g_main_cp_state.array_strides[static_cast(i)] = 129; } // This test is only done 100x in a row since it's ~20x slower using the From d5cfac71d09c4a751af607f16ba9c56755054d0a Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Wed, 21 Apr 2021 18:06:33 -0700 Subject: [PATCH 09/23] Refactor object listing code This also adds the commands after the last primitive data but before the next frame as a unique object; this is mainly just the XFB copy. It's nice to have these visible, though disabling the object does nothing since only primitive data is disabled and there is no primitive data in this case. --- .../Core/FifoPlayer/FifoPlaybackAnalyzer.cpp | 38 +++---- .../Core/FifoPlayer/FifoPlaybackAnalyzer.h | 33 +++++- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 89 ++++++--------- Source/Core/Core/FifoPlayer/FifoPlayer.h | 3 +- Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp | 103 +++++++++++++----- Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h | 10 +- 6 files changed, 163 insertions(+), 113 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp index d5ddf4310f..b5dcb5cd8c 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp @@ -46,7 +46,9 @@ void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, s_DrawingObject = false; u32 cmdStart = 0; - u32 nextMemUpdate = 0; + + u32 part_start = 0; + FifoAnalyzer::CPMemory cpmem; #if LOG_FIFO_CMDS // Debugging @@ -55,14 +57,6 @@ void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, while (cmdStart < frame.fifoData.size()) { - // Add memory updates that have occurred before this point in the frame - while (nextMemUpdate < frame.memoryUpdates.size() && - frame.memoryUpdates[nextMemUpdate].fifoPosition <= cmdStart) - { - analyzed.memoryUpdates.push_back(frame.memoryUpdates[nextMemUpdate]); - ++nextMemUpdate; - } - const bool wasDrawing = s_DrawingObject; const u32 cmdSize = FifoAnalyzer::AnalyzeCommand(&frame.fifoData[cmdStart], DecodeMode::Playback); @@ -79,9 +73,7 @@ void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, if (cmdSize == 0) { // Clean up frame analysis - analyzed.objectStarts.clear(); - analyzed.objectCPStates.clear(); - analyzed.objectEnds.clear(); + analyzed.parts.clear(); return; } @@ -90,22 +82,28 @@ void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, { if (s_DrawingObject) { - analyzed.objectStarts.push_back(cmdStart); - analyzed.objectCPStates.push_back(s_CpMem); + // Start of primitive data for an object + analyzed.AddPart(FramePartType::Commands, part_start, cmdStart, s_CpMem); + part_start = cmdStart; + // Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after + // primitive data, and the first opcode might update cpmem + std::memcpy(&cpmem, &s_CpMem, sizeof(FifoAnalyzer::CPMemory)); } else { - analyzed.objectEnds.push_back(cmdStart); + // End of primitive data for an object, and thus end of the object + analyzed.AddPart(FramePartType::PrimitiveData, part_start, cmdStart, cpmem); + part_start = cmdStart; } } cmdStart += cmdSize; } - if (analyzed.objectEnds.size() < analyzed.objectStarts.size()) - analyzed.objectEnds.push_back(cmdStart); - - ASSERT(analyzed.objectStarts.size() == analyzed.objectCPStates.size()); - ASSERT(analyzed.objectStarts.size() == analyzed.objectEnds.size()); + if (part_start != cmdStart) + { + // Remaining data, usually without any primitives + analyzed.AddPart(FramePartType::Commands, part_start, cmdStart, s_CpMem); + } } } diff --git a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h index 78e4c6e7d8..071279c885 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h +++ b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h @@ -9,14 +9,35 @@ #include "Core/FifoPlayer/FifoAnalyzer.h" #include "Core/FifoPlayer/FifoDataFile.h" +enum class FramePartType +{ + Commands, + PrimitiveData, +}; + +struct FramePart +{ + constexpr FramePart(FramePartType type, u32 start, u32 end, const FifoAnalyzer::CPMemory& cpmem) + : m_type(type), m_start(start), m_end(end), m_cpmem(cpmem) + { + } + + const FramePartType m_type; + const u32 m_start; + const u32 m_end; + const FifoAnalyzer::CPMemory m_cpmem; +}; + struct AnalyzedFrameInfo { - // Start of the primitives for the object (after previous update commands) - std::vector objectStarts; - std::vector objectCPStates; - // End of the primitives for the object - std::vector objectEnds; - std::vector memoryUpdates; + std::vector parts; + Common::EnumMap part_type_counts; + + void AddPart(FramePartType type, u32 start, u32 end, const FifoAnalyzer::CPMemory& cpmem) + { + parts.emplace_back(type, start, end, cpmem); + part_type_counts[type]++; + } }; namespace FifoPlaybackAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index 47b9ac2f87..cae033aa93 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -191,7 +191,7 @@ u32 FifoPlayer::GetMaxObjectCount() const u32 result = 0; for (auto& frame : m_FrameInfo) { - const u32 count = static_cast(frame.objectStarts.size()); + const u32 count = frame.part_type_counts[FramePartType::PrimitiveData]; if (count > result) result = count; } @@ -202,7 +202,7 @@ u32 FifoPlayer::GetFrameObjectCount(u32 frame) const { if (frame < m_FrameInfo.size()) { - return static_cast(m_FrameInfo[frame].objectStarts.size()); + return m_FrameInfo[frame].part_type_counts[FramePartType::PrimitiveData]; } return 0; @@ -262,55 +262,35 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& m_ElapsedCycles = 0; m_FrameFifoSize = static_cast(frame.fifoData.size()); - // Determine start and end objects - u32 numObjects = (u32)(info.objectStarts.size()); - u32 drawStart = std::min(numObjects, m_ObjectRangeStart); - u32 drawEnd = std::min(numObjects - 1, m_ObjectRangeEnd); + u32 memory_update = 0; + u32 object_num = 0; - u32 position = 0; - u32 memoryUpdate = 0; - - // Skip memory updates during frame if true + // Skip all memory updates if early memory updates are enabled, as we already wrote them if (m_EarlyMemoryUpdates) { - memoryUpdate = (u32)(frame.memoryUpdates.size()); + memory_update = (u32)(frame.memoryUpdates.size()); } - if (numObjects > 0) + for (const FramePart& part : info.parts) { - u32 objectNum = 0; + bool show_part; - // Write fifo data skipping objects before the draw range - while (objectNum < drawStart) + if (part.m_type == FramePartType::PrimitiveData) { - WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info); - - position = info.objectEnds[objectNum]; - ++objectNum; + show_part = m_ObjectRangeStart <= object_num && object_num <= m_ObjectRangeEnd; + object_num++; + } + else + { + // We always include commands and EFB copies, as commands from earlier objects still apply to + // later ones (games generally do not reconfigure everything for each object) + show_part = true; } - // Write objects in draw range - if (objectNum < numObjects && drawStart <= drawEnd) - { - objectNum = drawEnd; - WriteFramePart(position, info.objectEnds[objectNum], memoryUpdate, frame, info); - position = info.objectEnds[objectNum]; - ++objectNum; - } - - // Write fifo data skipping objects after the draw range - while (objectNum < numObjects) - { - WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info); - - position = info.objectEnds[objectNum]; - ++objectNum; - } + if (show_part) + WriteFramePart(part, &memory_update, frame); } - // Write data after the last object - WriteFramePart(position, static_cast(frame.fifoData.size()), memoryUpdate, frame, info); - FlushWGP(); // Sleep while the GPU is active @@ -321,36 +301,39 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& } } -void FifoPlayer::WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, - const FifoFrameInfo& frame, const AnalyzedFrameInfo& info) +void FifoPlayer::WriteFramePart(const FramePart& part, u32* next_mem_update, + const FifoFrameInfo& frame) { const u8* const data = frame.fifoData.data(); - while (nextMemUpdate < frame.memoryUpdates.size() && dataStart < dataEnd) - { - const MemoryUpdate& memUpdate = info.memoryUpdates[nextMemUpdate]; + u32 data_start = part.m_start; + const u32 data_end = part.m_end; - if (memUpdate.fifoPosition < dataEnd) + while (*next_mem_update < frame.memoryUpdates.size() && data_start < data_end) + { + const MemoryUpdate& memUpdate = frame.memoryUpdates[*next_mem_update]; + + if (memUpdate.fifoPosition < data_end) { - if (dataStart < memUpdate.fifoPosition) + if (data_start < memUpdate.fifoPosition) { - WriteFifo(data, dataStart, memUpdate.fifoPosition); - dataStart = memUpdate.fifoPosition; + WriteFifo(data, data_start, memUpdate.fifoPosition); + data_start = memUpdate.fifoPosition; } WriteMemory(memUpdate); - ++nextMemUpdate; + ++*next_mem_update; } else { - WriteFifo(data, dataStart, dataEnd); - dataStart = dataEnd; + WriteFifo(data, data_start, data_end); + data_start = data_end; } } - if (dataStart < dataEnd) - WriteFifo(data, dataStart, dataEnd); + if (data_start < data_end) + WriteFifo(data, data_start, data_end); } void FifoPlayer::WriteAllMemoryUpdates() diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.h b/Source/Core/Core/FifoPlayer/FifoPlayer.h index 01ce07c4a0..8083612658 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.h +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.h @@ -108,8 +108,7 @@ private: CPU::State AdvanceFrame(); void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info); - void WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, const FifoFrameInfo& frame, - const AnalyzedFrameInfo& info); + void WriteFramePart(const FramePart& part, u32* next_mem_update, const FifoFrameInfo& frame); void WriteAllMemoryUpdates(); void WriteMemory(const MemoryUpdate& memUpdate); diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 73ba823ec1..223fdd7145 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -3,6 +3,8 @@ #include "DolphinQt/FIFO/FIFOAnalyzer.h" +#include + #include #include #include @@ -27,8 +29,12 @@ #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/XFStructs.h" +// Values range from 0 to number of frames - 1 constexpr int FRAME_ROLE = Qt::UserRole; -constexpr int OBJECT_ROLE = Qt::UserRole + 1; +// Values range from 0 to number of parts - 1 +constexpr int PART_START_ROLE = Qt::UserRole + 1; +// Values range from 1 to number of parts +constexpr int PART_END_ROLE = Qt::UserRole + 2; FIFOAnalyzer::FIFOAnalyzer() { @@ -144,22 +150,58 @@ void FIFOAnalyzer::UpdateTree() auto* file = FifoPlayer::GetInstance().GetFile(); const u32 frame_count = file->GetFrameCount(); + for (u32 frame = 0; frame < frame_count; frame++) { auto* frame_item = new QTreeWidgetItem({tr("Frame %1").arg(frame)}); recording_item->addChild(frame_item); - const u32 object_count = FifoPlayer::GetInstance().GetFrameObjectCount(frame); - for (u32 object = 0; object < object_count; object++) - { - auto* object_item = new QTreeWidgetItem({tr("Object %1").arg(object)}); + const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame); + ASSERT(frame_info.parts.size() != 0); + Common::EnumMap part_counts; + u32 part_start = 0; + + for (u32 part_nr = 0; part_nr < frame_info.parts.size(); part_nr++) + { + const auto& part = frame_info.parts[part_nr]; + + const u32 part_type_nr = part_counts[part.m_type]; + part_counts[part.m_type]++; + + QTreeWidgetItem* object_item = nullptr; + if (part.m_type == FramePartType::PrimitiveData) + object_item = new QTreeWidgetItem({tr("Object %1").arg(part_type_nr)}); + // We don't create dedicated labels for FramePartType::Command; + // those are grouped with the primitive + + if (object_item != nullptr) + { + frame_item->addChild(object_item); + + object_item->setData(0, FRAME_ROLE, frame); + object_item->setData(0, PART_START_ROLE, part_start); + object_item->setData(0, PART_END_ROLE, part_nr); + + part_start = part_nr + 1; + } + } + + // Final data (the XFB copy) + if (part_start != frame_info.parts.size()) + { + QTreeWidgetItem* object_item = new QTreeWidgetItem({tr("Final Data")}); frame_item->addChild(object_item); object_item->setData(0, FRAME_ROLE, frame); - object_item->setData(0, OBJECT_ROLE, object); + object_item->setData(0, PART_START_ROLE, part_start); + object_item->setData(0, PART_END_ROLE, u32(frame_info.parts.size() - 1)); } + + // The counts we computed should match the frame's counts + ASSERT(std::equal(frame_info.part_type_counts.begin(), frame_info.part_type_counts.end(), + part_counts.begin())); } } @@ -196,19 +238,19 @@ void FIFOAnalyzer::UpdateDetails() const auto items = m_tree_widget->selectedItems(); - if (items.isEmpty() || items[0]->data(0, OBJECT_ROLE).isNull()) + if (items.isEmpty() || items[0]->data(0, PART_START_ROLE).isNull()) return; const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); - const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); + const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt(); + const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt(); - const auto& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); + const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const auto& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); - // Note that frame_info.objectStarts[object_nr] is the start of the primitive data, - // but we want to start with the register updates which happen before that. - const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); - const u32 object_size = frame_info.objectEnds[object_nr] - object_start; + const u32 object_start = frame_info.parts[start_part_nr].m_start; + const u32 object_end = frame_info.parts[end_part_nr].m_end; + const u32 object_size = object_end - object_start; const u8* const object = &fifo_frame.fifoData[object_start]; @@ -348,10 +390,9 @@ void FIFOAnalyzer::UpdateDetails() if ((command & 0xC0) == 0x80) { // Object primitive data - const u8 vat = command & OpcodeDecoder::GX_VAT_MASK; - const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc; - const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat]; + const auto& vtx_desc = frame_info.parts[end_part_nr].m_cpmem.vtxDesc; + const auto& vtx_attr = frame_info.parts[end_part_nr].m_cpmem.vtxAttr[vat]; const auto name = GetPrimitiveName(command); @@ -396,8 +437,6 @@ void FIFOAnalyzer::UpdateDetails() m_detail_list->addItem(new_label); } - ASSERT(object_offset == object_size); - // Needed to ensure the description updates when changing objects m_detail_list->setCurrentRow(0); } @@ -412,12 +451,15 @@ void FIFOAnalyzer::BeginSearch() const auto items = m_tree_widget->selectedItems(); if (items.isEmpty() || items[0]->data(0, FRAME_ROLE).isNull() || - items[0]->data(0, OBJECT_ROLE).isNull()) + items[0]->data(0, PART_START_ROLE).isNull()) { m_search_label->setText(tr("Invalid search parameters (no object selected)")); return; } + // Having PART_START_ROLE indicates that this is valid + const int object_idx = items[0]->parent()->indexOfChild(items[0]); + // TODO: Remove even string length limit if (search_str.length() % 2) { @@ -448,13 +490,15 @@ void FIFOAnalyzer::BeginSearch() m_search_results.clear(); const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); - const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); + const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt(); + const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt(); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); - const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); - const u32 object_size = frame_info.objectEnds[object_nr] - object_start; + const u32 object_start = frame_info.parts[start_part_nr].m_start; + const u32 object_end = frame_info.parts[end_part_nr].m_end; + const u32 object_size = object_end - object_start; const u8* const object = &fifo_frame.fifoData[object_start]; @@ -473,7 +517,7 @@ void FIFOAnalyzer::BeginSearch() { if (std::equal(search_val.begin(), search_val.end(), ptr)) { - m_search_results.emplace_back(frame_nr, object_nr, cmd_nr); + m_search_results.emplace_back(frame_nr, object_idx, cmd_nr); break; } } @@ -527,7 +571,7 @@ void FIFOAnalyzer::ShowSearchResult(size_t index) const auto& result = m_search_results[index]; QTreeWidgetItem* object_item = - m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object); + m_tree_widget->topLevelItem(0)->child(result.m_frame)->child(result.m_object_idx); m_tree_widget->setCurrentItem(object_item); m_detail_list->setCurrentRow(result.m_cmd); @@ -550,17 +594,18 @@ void FIFOAnalyzer::UpdateDescription() if (items.isEmpty() || m_object_data_offsets.empty()) return; - if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, OBJECT_ROLE).isNull()) + if (items[0]->data(0, FRAME_ROLE).isNull() || items[0]->data(0, PART_START_ROLE).isNull()) return; const u32 frame_nr = items[0]->data(0, FRAME_ROLE).toUInt(); - const u32 object_nr = items[0]->data(0, OBJECT_ROLE).toUInt(); + const u32 start_part_nr = items[0]->data(0, PART_START_ROLE).toUInt(); + const u32 end_part_nr = items[0]->data(0, PART_END_ROLE).toUInt(); const u32 entry_nr = m_detail_list->currentRow(); const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame_nr); const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); - const u32 object_start = (object_nr == 0 ? 0 : frame_info.objectEnds[object_nr - 1]); + const u32 object_start = frame_info.parts[start_part_nr].m_start; const u32 entry_start = m_object_data_offsets[entry_nr]; const u8* cmddata = &fifo_frame.fifoData[object_start + entry_start]; @@ -671,8 +716,8 @@ void FIFOAnalyzer::UpdateDescription() text = tr("Primitive %1").arg(name); text += QLatin1Char{'\n'}; - const auto& vtx_desc = frame_info.objectCPStates[object_nr].vtxDesc; - const auto& vtx_attr = frame_info.objectCPStates[object_nr].vtxAttr[vat]; + const auto& vtx_desc = frame_info.parts[end_part_nr].m_cpmem.vtxDesc; + const auto& vtx_attr = frame_info.parts[end_part_nr].m_cpmem.vtxAttr[vat]; const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr); u32 i = 3; diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h index 6a1c0a948a..222ce8e06b 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h @@ -58,15 +58,19 @@ private: struct SearchResult { - constexpr SearchResult(u32 frame, u32 object, u32 cmd) - : m_frame(frame), m_object(object), m_cmd(cmd) + constexpr SearchResult(u32 frame, u32 object_idx, u32 cmd) + : m_frame(frame), m_object_idx(object_idx), m_cmd(cmd) { } const u32 m_frame; - const u32 m_object; + // Index in tree view. Does not correspond with object numbers or part numbers. + const u32 m_object_idx; const u32 m_cmd; }; + // Offsets from the start of the first part in an object for each command within the currently + // selected object. std::vector m_object_data_offsets; + std::vector m_search_results; }; From 04418262063bf6034bc2be172a64eb99a9e9a2bb Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 22 Apr 2021 18:00:24 -0700 Subject: [PATCH 10/23] Fix wrapping in FifoPlayer comment --- Source/Core/Core/FifoPlayer/FifoPlayer.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.h b/Source/Core/Core/FifoPlayer/FifoPlayer.h index 8083612658..96a170810c 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.h +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.h @@ -43,12 +43,10 @@ enum class State; // 8. The output of fifoplayer would be wrong. // To keep compatibility with old fifologs, we have this flag which signals texture cache to not -// bother -// hashing the memory and just assume the hash matched. +// bother hashing the memory and just assume the hash matched. // At a later point proper efb copy support should be added to fiforecorder and this flag will -// change -// based on the version of the .dff file, but until then it will always be true when a fifolog is -// playing. +// change based on the version of the .dff file, but until then it will always be true when a +// fifolog is playing. // Shitty global to fix a shitty problem extern bool IsPlayingBackFifologWithBrokenEFBCopies; From b5fd35f95145ecc8f88a179229ed69b390eb76be Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 22 Apr 2021 20:57:56 -0700 Subject: [PATCH 11/23] Refactor OpcodeDecoding and FIFO analyzer to use callbacks --- Source/Core/Core/CMakeLists.txt | 6 - Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp | 295 --------- Source/Core/Core/FifoPlayer/FifoAnalyzer.h | 35 -- .../Core/FifoPlayer/FifoPlaybackAnalyzer.cpp | 109 ---- .../Core/FifoPlayer/FifoPlaybackAnalyzer.h | 46 -- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 117 +++- Source/Core/Core/FifoPlayer/FifoPlayer.h | 37 +- .../Core/FifoPlayer/FifoRecordAnalyzer.cpp | 103 --- .../Core/Core/FifoPlayer/FifoRecordAnalyzer.h | 17 - Source/Core/Core/FifoPlayer/FifoRecorder.cpp | 163 ++++- Source/Core/Core/FifoPlayer/FifoRecorder.h | 4 + Source/Core/DolphinLib.props | 6 - Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp | 589 ++++++++---------- .../Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp | 1 - Source/Core/VideoCommon/BPMemory.h | 4 +- Source/Core/VideoCommon/BPStructs.cpp | 22 +- Source/Core/VideoCommon/CPMemory.cpp | 169 ++++- Source/Core/VideoCommon/CPMemory.h | 18 +- Source/Core/VideoCommon/CommandProcessor.cpp | 7 +- Source/Core/VideoCommon/CommandProcessor.h | 2 +- Source/Core/VideoCommon/Fifo.cpp | 14 +- Source/Core/VideoCommon/OpcodeDecoding.cpp | 424 ++++++------- Source/Core/VideoCommon/OpcodeDecoding.h | 222 ++++++- Source/Core/VideoCommon/RenderBase.cpp | 2 +- Source/Core/VideoCommon/VertexLoaderARM64.cpp | 2 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 147 ----- Source/Core/VideoCommon/XFMemory.h | 6 +- Source/Core/VideoCommon/XFStructs.cpp | 29 +- Source/Core/VideoCommon/XFStructs.h | 6 +- 29 files changed, 1214 insertions(+), 1388 deletions(-) delete mode 100644 Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp delete mode 100644 Source/Core/Core/FifoPlayer/FifoAnalyzer.h delete mode 100644 Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp delete mode 100644 Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h delete mode 100644 Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp delete mode 100644 Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index a06a391c06..c0608572df 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -103,16 +103,10 @@ add_library(core DSP/LabelMap.h DSPEmulator.cpp DSPEmulator.h - FifoPlayer/FifoAnalyzer.cpp - FifoPlayer/FifoAnalyzer.h FifoPlayer/FifoDataFile.cpp FifoPlayer/FifoDataFile.h - FifoPlayer/FifoPlaybackAnalyzer.cpp - FifoPlayer/FifoPlaybackAnalyzer.h FifoPlayer/FifoPlayer.cpp FifoPlayer/FifoPlayer.h - FifoPlayer/FifoRecordAnalyzer.cpp - FifoPlayer/FifoRecordAnalyzer.h FifoPlayer/FifoRecorder.cpp FifoPlayer/FifoRecorder.h FreeLookConfig.cpp diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp deleted file mode 100644 index ca9ed3574d..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "Core/FifoPlayer/FifoAnalyzer.h" - -#include - -#include "Common/Assert.h" -#include "Common/MsgHandler.h" -#include "Common/Swap.h" - -#include "Core/FifoPlayer/FifoRecordAnalyzer.h" - -#include "VideoCommon/OpcodeDecoding.h" -#include "VideoCommon/VertexLoader.h" -#include "VideoCommon/VertexLoader_Normal.h" -#include "VideoCommon/VertexLoader_Position.h" -#include "VideoCommon/VertexLoader_TextCoord.h" - -namespace FifoAnalyzer -{ -namespace -{ -u8 ReadFifo8(const u8*& data) -{ - const u8 value = data[0]; - data += 1; - return value; -} - -u16 ReadFifo16(const u8*& data) -{ - const u16 value = Common::swap16(data); - data += 2; - return value; -} - -u32 ReadFifo32(const u8*& data) -{ - const u32 value = Common::swap32(data); - data += 4; - return value; -} - -std::array CalculateVertexElementSizes(int vatIndex, const CPMemory& cpMem) -{ - const TVtxDesc& vtxDesc = cpMem.vtxDesc; - const VAT& vtxAttr = cpMem.vtxAttr[vatIndex]; - - // Colors - const std::array colComp{ - vtxAttr.g0.Color0Comp, - vtxAttr.g0.Color1Comp, - }; - - const std::array tcElements{ - vtxAttr.g0.Tex0CoordElements, vtxAttr.g1.Tex1CoordElements, vtxAttr.g1.Tex2CoordElements, - vtxAttr.g1.Tex3CoordElements, vtxAttr.g1.Tex4CoordElements, vtxAttr.g2.Tex5CoordElements, - vtxAttr.g2.Tex6CoordElements, vtxAttr.g2.Tex7CoordElements, - }; - const std::array tcFormat{ - vtxAttr.g0.Tex0CoordFormat, vtxAttr.g1.Tex1CoordFormat, vtxAttr.g1.Tex2CoordFormat, - vtxAttr.g1.Tex3CoordFormat, vtxAttr.g1.Tex4CoordFormat, vtxAttr.g2.Tex5CoordFormat, - vtxAttr.g2.Tex6CoordFormat, vtxAttr.g2.Tex7CoordFormat, - }; - - std::array sizes{}; - - // Add position and texture matrix indices - sizes[0] = vtxDesc.low.PosMatIdx; - for (size_t i = 0; i < vtxDesc.low.TexMatIdx.Size(); ++i) - { - sizes[i + 1] = vtxDesc.low.TexMatIdx[i]; - } - - // Position - sizes[9] = VertexLoader_Position::GetSize(vtxDesc.low.Position, vtxAttr.g0.PosFormat, - vtxAttr.g0.PosElements); - - // Normals - if (vtxDesc.low.Normal != VertexComponentFormat::NotPresent) - { - sizes[10] = VertexLoader_Normal::GetSize(vtxDesc.low.Normal, vtxAttr.g0.NormalFormat, - vtxAttr.g0.NormalElements, vtxAttr.g0.NormalIndex3); - } - else - { - sizes[10] = 0; - } - - // Colors - for (size_t i = 0; i < vtxDesc.low.Color.Size(); i++) - { - int size = 0; - - switch (vtxDesc.low.Color[i]) - { - case VertexComponentFormat::NotPresent: - break; - case VertexComponentFormat::Direct: - switch (colComp[i]) - { - case ColorFormat::RGB565: - size = 2; - break; - case ColorFormat::RGB888: - size = 3; - break; - case ColorFormat::RGB888x: - size = 4; - break; - case ColorFormat::RGBA4444: - size = 2; - break; - case ColorFormat::RGBA6666: - size = 3; - break; - case ColorFormat::RGBA8888: - size = 4; - break; - default: - ASSERT(0); - break; - } - break; - case VertexComponentFormat::Index8: - size = 1; - break; - case VertexComponentFormat::Index16: - size = 2; - break; - } - - sizes[11 + i] = size; - } - - // Texture coordinates - for (size_t i = 0; i < tcFormat.size(); i++) - { - sizes[13 + i] = - VertexLoader_TextCoord::GetSize(vtxDesc.high.TexCoord[i], tcFormat[i], tcElements[i]); - } - - return sizes; -} -} // Anonymous namespace - -bool s_DrawingObject; -FifoAnalyzer::CPMemory s_CpMem; - -u32 AnalyzeCommand(const u8* data, DecodeMode mode) -{ - using OpcodeDecoder::Opcode; - const u8* dataStart = data; - - int cmd = ReadFifo8(data); - - switch (static_cast(cmd)) - { - case Opcode::GX_NOP: - case Opcode::GX_CMD_UNKNOWN_METRICS: - case Opcode::GX_CMD_INVL_VC: - break; - - case Opcode::GX_LOAD_CP_REG: - { - s_DrawingObject = false; - - u32 cmd2 = ReadFifo8(data); - u32 value = ReadFifo32(data); - LoadCPReg(cmd2, value, s_CpMem); - break; - } - - case Opcode::GX_LOAD_XF_REG: - { - s_DrawingObject = false; - - u32 cmd2 = ReadFifo32(data); - u8 streamSize = ((cmd2 >> 16) & 15) + 1; - - data += streamSize * 4; - break; - } - - case Opcode::GX_LOAD_INDX_A: - case Opcode::GX_LOAD_INDX_B: - case Opcode::GX_LOAD_INDX_C: - case Opcode::GX_LOAD_INDX_D: - { - s_DrawingObject = false; - - CPArray array = static_cast(0xc + (cmd - static_cast(Opcode::GX_LOAD_INDX_A)) / 8); - u32 value = ReadFifo32(data); - - if (mode == DecodeMode::Record) - FifoRecordAnalyzer::ProcessLoadIndexedXf(array, value); - break; - } - - case Opcode::GX_CMD_CALL_DL: - // The recorder should have expanded display lists into the fifo stream and skipped the call to - // start them - // That is done to make it easier to track where memory is updated - ASSERT(false); - data += 8; - break; - - case Opcode::GX_LOAD_BP_REG: - { - s_DrawingObject = false; - ReadFifo32(data); - break; - } - - default: - if (cmd & 0x80) - { - s_DrawingObject = true; - - const std::array sizes = - CalculateVertexElementSizes(cmd & OpcodeDecoder::GX_VAT_MASK, s_CpMem); - - // Determine offset of each element that might be a vertex array - // The first 9 elements are never vertex arrays so we just accumulate their sizes. - int offset = std::accumulate(sizes.begin(), sizes.begin() + 9, 0u); - std::array offsets; - for (size_t i = 0; i < offsets.size(); ++i) - { - offsets[i] = offset; - offset += sizes[i + 9]; - } - - const int vertexSize = offset; - const int numVertices = ReadFifo16(data); - - if (mode == DecodeMode::Record && numVertices > 0) - { - for (size_t i = 0; i < offsets.size(); ++i) - { - FifoRecordAnalyzer::WriteVertexArray(static_cast(i), data + offsets[i], - vertexSize, numVertices); - } - } - - data += numVertices * vertexSize; - } - else - { - PanicAlertFmt("FifoPlayer: Unknown Opcode ({:#x}).\n", cmd); - return 0; - } - break; - } - - return (u32)(data - dataStart); -} - -void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem) -{ - switch (subCmd & CP_COMMAND_MASK) - { - case VCD_LO: - cpMem.vtxDesc.low.Hex = value; - break; - - case VCD_HI: - cpMem.vtxDesc.high.Hex = value; - break; - - case CP_VAT_REG_A: - ASSERT(subCmd - CP_VAT_REG_A < CP_NUM_VAT_REG); - cpMem.vtxAttr[subCmd & CP_VAT_MASK].g0.Hex = value; - break; - - case CP_VAT_REG_B: - ASSERT(subCmd - CP_VAT_REG_B < CP_NUM_VAT_REG); - cpMem.vtxAttr[subCmd & CP_VAT_MASK].g1.Hex = value; - break; - - case CP_VAT_REG_C: - ASSERT(subCmd - CP_VAT_REG_C < CP_NUM_VAT_REG); - cpMem.vtxAttr[subCmd & CP_VAT_MASK].g2.Hex = value; - break; - - case ARRAY_BASE: - cpMem.arrayBases[static_cast(subCmd & CP_ARRAY_MASK)] = value; - break; - - case ARRAY_STRIDE: - cpMem.arrayStrides[static_cast(subCmd & CP_ARRAY_MASK)] = value & 0xFF; - break; - } -} -} // namespace FifoAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoAnalyzer.h deleted file mode 100644 index e9604ce918..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "Common/CommonTypes.h" -#include "Common/EnumMap.h" - -#include "VideoCommon/CPMemory.h" - -namespace FifoAnalyzer -{ -enum class DecodeMode -{ - Record, - Playback, -}; - -u32 AnalyzeCommand(const u8* data, DecodeMode mode); - -struct CPMemory -{ - TVtxDesc vtxDesc; - std::array vtxAttr; - Common::EnumMap arrayBases{}; - Common::EnumMap arrayStrides{}; -}; - -void LoadCPReg(u32 subCmd, u32 value, CPMemory& cpMem); - -extern bool s_DrawingObject; -extern FifoAnalyzer::CPMemory s_CpMem; -} // namespace FifoAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp deleted file mode 100644 index b5dcb5cd8c..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h" - -#include - -#include "Common/Assert.h" -#include "Common/CommonTypes.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoDataFile.h" - -using namespace FifoAnalyzer; - -// For debugging -#define LOG_FIFO_CMDS 0 -struct CmdData -{ - u32 size; - u32 offset; - const u8* ptr; -}; - -void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, - std::vector& frameInfo) -{ - u32* cpMem = file->GetCPMem(); - FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem); - FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem); - - for (u32 i = 0; i < CP_NUM_VAT_REG; ++i) - { - FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem); - FifoAnalyzer::LoadCPReg(CP_VAT_REG_B + i, cpMem[CP_VAT_REG_B + i], s_CpMem); - FifoAnalyzer::LoadCPReg(CP_VAT_REG_C + i, cpMem[CP_VAT_REG_C + i], s_CpMem); - } - - frameInfo.clear(); - frameInfo.resize(file->GetFrameCount()); - - for (u32 frameIdx = 0; frameIdx < file->GetFrameCount(); ++frameIdx) - { - const FifoFrameInfo& frame = file->GetFrame(frameIdx); - AnalyzedFrameInfo& analyzed = frameInfo[frameIdx]; - - s_DrawingObject = false; - - u32 cmdStart = 0; - - u32 part_start = 0; - FifoAnalyzer::CPMemory cpmem; - -#if LOG_FIFO_CMDS - // Debugging - std::vector prevCmds; -#endif - - while (cmdStart < frame.fifoData.size()) - { - const bool wasDrawing = s_DrawingObject; - const u32 cmdSize = - FifoAnalyzer::AnalyzeCommand(&frame.fifoData[cmdStart], DecodeMode::Playback); - -#if LOG_FIFO_CMDS - CmdData cmdData; - cmdData.offset = cmdStart; - cmdData.ptr = &frame.fifoData[cmdStart]; - cmdData.size = cmdSize; - prevCmds.push_back(cmdData); -#endif - - // Check for error - if (cmdSize == 0) - { - // Clean up frame analysis - analyzed.parts.clear(); - - return; - } - - if (wasDrawing != s_DrawingObject) - { - if (s_DrawingObject) - { - // Start of primitive data for an object - analyzed.AddPart(FramePartType::Commands, part_start, cmdStart, s_CpMem); - part_start = cmdStart; - // Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after - // primitive data, and the first opcode might update cpmem - std::memcpy(&cpmem, &s_CpMem, sizeof(FifoAnalyzer::CPMemory)); - } - else - { - // End of primitive data for an object, and thus end of the object - analyzed.AddPart(FramePartType::PrimitiveData, part_start, cmdStart, cpmem); - part_start = cmdStart; - } - } - - cmdStart += cmdSize; - } - - if (part_start != cmdStart) - { - // Remaining data, usually without any primitives - analyzed.AddPart(FramePartType::Commands, part_start, cmdStart, s_CpMem); - } - } -} diff --git a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h deleted file mode 100644 index 071279c885..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include - -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoDataFile.h" - -enum class FramePartType -{ - Commands, - PrimitiveData, -}; - -struct FramePart -{ - constexpr FramePart(FramePartType type, u32 start, u32 end, const FifoAnalyzer::CPMemory& cpmem) - : m_type(type), m_start(start), m_end(end), m_cpmem(cpmem) - { - } - - const FramePartType m_type; - const u32 m_start; - const u32 m_end; - const FifoAnalyzer::CPMemory m_cpmem; -}; - -struct AnalyzedFrameInfo -{ - std::vector parts; - Common::EnumMap part_type_counts; - - void AddPart(FramePartType type, u32 start, u32 end, const FifoAnalyzer::CPMemory& cpmem) - { - parts.emplace_back(type, start, end, cpmem); - part_type_counts[type]++; - } -}; - -namespace FifoPlaybackAnalyzer -{ -void AnalyzeFrames(FifoDataFile* file, std::vector& frameInfo); -} // namespace FifoPlaybackAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index cae033aa93..bbb08a7ddd 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -4,6 +4,7 @@ #include "Core/FifoPlayer/FifoPlayer.h" #include +#include #include #include "Common/Assert.h" @@ -12,7 +13,6 @@ #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/CoreTiming.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" #include "Core/FifoPlayer/FifoDataFile.h" #include "Core/HW/CPU.h" #include "Core/HW/GPFifo.h" @@ -31,6 +31,121 @@ // TODO: Move texMem somewhere else so this isn't an issue. #include "VideoCommon/TextureDecoder.h" +namespace +{ +class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback +{ +public: + static void AnalyzeFrames(FifoDataFile* file, std::vector& frame_info); + + explicit FifoPlaybackAnalyzer(const u32* cpmem) : m_cpmem(cpmem) {} + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {} + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); } + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {} + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {} + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, + const u8* vertex_data)); + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) {} + OPCODE_CALLBACK(void OnNop(u32 count)); + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {} + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)); + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + + bool m_start_of_primitives = false; + bool m_end_of_primitives = false; + // Internal state, copied to above in OnCommand + bool m_was_primitive = false; + bool m_is_primitive = false; + bool m_is_nop = false; + CPState m_cpmem; +}; + +void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, + std::vector& frame_info) +{ + FifoPlaybackAnalyzer analyzer(file->GetCPMem()); + frame_info.clear(); + frame_info.resize(file->GetFrameCount()); + + for (u32 frame_no = 0; frame_no < file->GetFrameCount(); frame_no++) + { + const FifoFrameInfo& frame = file->GetFrame(frame_no); + AnalyzedFrameInfo& analyzed = frame_info[frame_no]; + + u32 offset = 0; + + u32 part_start = 0; + CPState cpmem; + + while (offset < frame.fifoData.size()) + { + const u32 cmd_size = OpcodeDecoder::RunCommand(&frame.fifoData[offset], + u32(frame.fifoData.size()) - offset, analyzer); + + if (analyzer.m_start_of_primitives) + { + // Start of primitive data for an object + analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem); + part_start = offset; + // Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after + // primitive data, and the first opcode might update cpmem + std::memcpy(&cpmem, &analyzer.m_cpmem, sizeof(CPState)); + } + if (analyzer.m_end_of_primitives) + { + // End of primitive data for an object, and thus end of the object + analyzed.AddPart(FramePartType::PrimitiveData, part_start, offset, cpmem); + part_start = offset; + } + + offset += cmd_size; + } + + if (part_start != offset) + { + // Remaining data, usually without any primitives + analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem); + } + + ASSERT(offset == frame.fifoData.size()); + } +} + +void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, + const u8* vertex_data) +{ + m_is_primitive = true; +} + +void FifoPlaybackAnalyzer::OnNop(u32 count) +{ + m_is_nop = true; +} + +void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size) +{ + m_start_of_primitives = false; + m_end_of_primitives = false; + + if (!m_is_nop) + { + if (m_is_primitive && !m_was_primitive) + m_start_of_primitives = true; + else if (m_was_primitive && !m_is_primitive) + m_end_of_primitives = true; + + m_was_primitive = m_is_primitive; + } + m_is_primitive = false; + m_is_nop = false; +} +} // namespace + bool IsPlayingBackFifologWithBrokenEFBCopies = false; FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay} diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.h b/Source/Core/Core/FifoPlayer/FifoPlayer.h index 96a170810c..ffae2e92d4 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.h +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.h @@ -8,13 +8,14 @@ #include #include +#include "Common/Assert.h" #include "Core/FifoPlayer/FifoDataFile.h" -#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h" #include "Core/PowerPC/CPUCoreBase.h" +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/OpcodeDecoding.h" class FifoDataFile; struct MemoryUpdate; -struct AnalyzedFrameInfo; namespace CPU { @@ -51,6 +52,37 @@ enum class State; // Shitty global to fix a shitty problem extern bool IsPlayingBackFifologWithBrokenEFBCopies; +enum class FramePartType +{ + Commands, + PrimitiveData, +}; + +struct FramePart +{ + constexpr FramePart(FramePartType type, u32 start, u32 end, const CPState& cpmem) + : m_type(type), m_start(start), m_end(end), m_cpmem(cpmem) + { + } + + const FramePartType m_type; + const u32 m_start; + const u32 m_end; + const CPState m_cpmem; +}; + +struct AnalyzedFrameInfo +{ + std::vector parts; + Common::EnumMap part_type_counts; + + void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem) + { + parts.emplace_back(type, start, end, cpmem); + part_type_counts[type]++; + } +}; + class FifoPlayer { public: @@ -100,7 +132,6 @@ public: private: class CPUCore; - FifoPlayer(); CPU::State AdvanceFrame(); diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp deleted file mode 100644 index 4ada443fcf..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "Core/FifoPlayer/FifoRecordAnalyzer.h" - -#include - -#include "Common/MsgHandler.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoRecorder.h" -#include "Core/HW/Memmap.h" - -using namespace FifoAnalyzer; - -void FifoRecordAnalyzer::Initialize(const u32* cpMem) -{ - s_DrawingObject = false; - - FifoAnalyzer::LoadCPReg(VCD_LO, cpMem[VCD_LO], s_CpMem); - FifoAnalyzer::LoadCPReg(VCD_HI, cpMem[VCD_HI], s_CpMem); - for (u32 i = 0; i < CP_NUM_VAT_REG; ++i) - FifoAnalyzer::LoadCPReg(CP_VAT_REG_A + i, cpMem[CP_VAT_REG_A + i], s_CpMem); - - const u32* const bases_start = cpMem + ARRAY_BASE; - const u32* const bases_end = bases_start + s_CpMem.arrayBases.size(); - std::copy(bases_start, bases_end, s_CpMem.arrayBases.begin()); - - const u32* const strides_start = cpMem + ARRAY_STRIDE; - const u32* const strides_end = strides_start + s_CpMem.arrayStrides.size(); - std::copy(strides_start, strides_end, s_CpMem.arrayStrides.begin()); -} - -void FifoRecordAnalyzer::ProcessLoadIndexedXf(CPArray array, u32 val) -{ - int index = val >> 16; - int size = ((val >> 12) & 0xF) + 1; - - u32 address = s_CpMem.arrayBases[array] + s_CpMem.arrayStrides[array] * index; - - FifoRecorder::GetInstance().UseMemory(address, size * 4, MemoryUpdate::XF_DATA); -} - -void FifoRecordAnalyzer::WriteVertexArray(CPArray arrayIndex, const u8* vertexData, int vertexSize, - int numVertices) -{ - // Skip if not indexed array - VertexComponentFormat arrayType; - if (arrayIndex == CPArray::Position) - arrayType = s_CpMem.vtxDesc.low.Position; - else if (arrayIndex == CPArray::Normal) - arrayType = s_CpMem.vtxDesc.low.Normal; - else if (arrayIndex >= CPArray::Color0 && arrayIndex <= CPArray::Color1) - arrayType = s_CpMem.vtxDesc.low.Color[u8(arrayIndex) - u8(CPArray::Color0)]; - else if (arrayIndex >= CPArray::TexCoord0 && arrayIndex <= CPArray::TexCoord7) - arrayType = s_CpMem.vtxDesc.high.TexCoord[u8(arrayIndex) - u8(CPArray::TexCoord0)]; - else - { - PanicAlertFmt("Invalid arrayIndex {}", arrayIndex); - return; - } - - if (!IsIndexed(arrayType)) - return; - - int maxIndex = 0; - - // Determine min and max indices - if (arrayType == VertexComponentFormat::Index8) - { - for (int i = 0; i < numVertices; ++i) - { - int index = *vertexData; - vertexData += vertexSize; - - // 0xff skips the vertex - if (index != 0xff) - { - if (index > maxIndex) - maxIndex = index; - } - } - } - else - { - for (int i = 0; i < numVertices; ++i) - { - int index = Common::swap16(vertexData); - vertexData += vertexSize; - - // 0xffff skips the vertex - if (index != 0xffff) - { - if (index > maxIndex) - maxIndex = index; - } - } - } - - u32 arrayStart = s_CpMem.arrayBases[arrayIndex]; - u32 arraySize = s_CpMem.arrayStrides[arrayIndex] * (maxIndex + 1); - - FifoRecorder::GetInstance().UseMemory(arrayStart, arraySize, MemoryUpdate::VERTEX_STREAM); -} diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h deleted file mode 100644 index d1ac21c09b..0000000000 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "Common/CommonTypes.h" - -enum class CPArray : u8; - -namespace FifoRecordAnalyzer -{ -// Must call this before analyzing Fifo commands with FifoAnalyzer::AnalyzeCommand() -void Initialize(const u32* cpMem); - -void ProcessLoadIndexedXf(CPArray array, u32 val); -void WriteVertexArray(CPArray arrayIndex, const u8* vertexData, int vertexSize, int numVertices); -} // namespace FifoRecordAnalyzer diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp index 8c4bf184bc..47cb25d84b 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp @@ -6,13 +6,168 @@ #include #include +#include "Common/Logging/Log.h" #include "Common/MsgHandler.h" #include "Common/Thread.h" + #include "Core/ConfigManager.h" -#include "Core/FifoPlayer/FifoAnalyzer.h" -#include "Core/FifoPlayer/FifoRecordAnalyzer.h" #include "Core/HW/Memmap.h" +#include "VideoCommon/OpcodeDecoding.h" +#include "VideoCommon/XFStructs.h" + +class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback +{ +public: + explicit FifoRecordAnalyzer(FifoRecorder* owner) : m_owner(owner) {} + explicit FifoRecordAnalyzer(FifoRecorder* owner, const u32* cpmem) + : m_owner(owner), m_cpmem(cpmem) + { + } + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {} + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); } + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {} + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)); + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, + const u8* vertex_data)); + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) + { + WARN_LOG_FMT(VIDEO, + "Unhandled display list call {:08x} {:08x}; should have been inlined earlier", + address, size); + } + OPCODE_CALLBACK(void OnNop(u32 count)) {} + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {} + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {} + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + +private: + void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type, + u32 component_offset, u32 vertex_size, u16 num_vertices, + const u8* vertex_data); + + FifoRecorder* const m_owner; + CPState m_cpmem; +}; + +void FifoRecorder::FifoRecordAnalyzer::OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) +{ + const u32 load_address = m_cpmem.array_bases[array] + m_cpmem.array_strides[array] * index; + + m_owner->UseMemory(load_address, size * sizeof(u32), MemoryUpdate::XF_DATA); +} + +// TODO: The following code is copied with modifications from VertexLoaderBase. +// Surely there's a better solution? +#include "VideoCommon/VertexLoader_Color.h" +#include "VideoCommon/VertexLoader_Normal.h" +#include "VideoCommon/VertexLoader_Position.h" +#include "VideoCommon/VertexLoader_TextCoord.h" + +void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, + u8 vat, u32 vertex_size, u16 num_vertices, + const u8* vertex_data) +{ + const auto& vtx_desc = m_cpmem.vtx_desc; + const auto& vtx_attr = m_cpmem.vtx_attr[vat]; + + u32 offset = 0; + + if (vtx_desc.low.PosMatIdx) + offset++; + for (auto texmtxidx : vtx_desc.low.TexMatIdx) + { + if (texmtxidx) + offset++; + } + const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat, + vtx_attr.g0.PosElements); + ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size, num_vertices, + vertex_data); + offset += pos_size; + + const u32 norm_size = + VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat, + vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3); + ProcessVertexComponent(CPArray::Normal, vtx_desc.low.Position, offset, vertex_size, num_vertices, + vertex_data); + offset += norm_size; + + for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++) + { + const u32 color_size = + VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i)); + ProcessVertexComponent(CPArray::Color0 + i, vtx_desc.low.Position, offset, vertex_size, + num_vertices, vertex_data); + offset += color_size; + } + for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++) + { + const u32 tc_size = VertexLoader_TextCoord::GetSize( + vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i)); + ProcessVertexComponent(CPArray::TexCoord0 + i, vtx_desc.low.Position, offset, vertex_size, + num_vertices, vertex_data); + offset += tc_size; + } + + ASSERT(offset == vertex_size); +} + +// If a component is indexed, the array it indexes into for data must be saved. +void FifoRecorder::FifoRecordAnalyzer::ProcessVertexComponent(CPArray array_index, + VertexComponentFormat array_type, + u32 component_offset, u32 vertex_size, + u16 num_vertices, + const u8* vertex_data) +{ + // Skip if not indexed array + if (!IsIndexed(array_type)) + return; + + u16 max_index = 0; + + // Determine min and max indices + if (array_type == VertexComponentFormat::Index8) + { + for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++) + { + const u8 index = vertex_data[component_offset]; + vertex_data += vertex_size; + + // 0xff skips the vertex + if (index != 0xff) + { + if (index > max_index) + max_index = index; + } + } + } + else + { + for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++) + { + const u16 index = Common::swap16(&vertex_data[component_offset]); + vertex_data += vertex_size; + + // 0xffff skips the vertex + if (index != 0xffff) + { + if (index > max_index) + max_index = index; + } + } + } + + const u32 array_start = m_cpmem.array_bases[array_index]; + const u32 array_size = m_cpmem.array_strides[array_index] * (max_index + 1); + + m_owner->UseMemory(array_start, array_size, MemoryUpdate::VERTEX_STREAM); +} + static FifoRecorder instance; FifoRecorder::FifoRecorder() = default; @@ -76,7 +231,7 @@ void FifoRecorder::WriteGPCommand(const u8* data, u32 size) { // Assumes data contains all information for the command // Calls FifoRecorder::UseMemory - const u32 analyzed_size = FifoAnalyzer::AnalyzeCommand(data, FifoAnalyzer::DecodeMode::Record); + const u32 analyzed_size = OpcodeDecoder::RunCommand(data, size, *m_record_analyzer); // Make sure FifoPlayer's command analyzer agrees about the size of the command. if (analyzed_size != size) @@ -211,7 +366,7 @@ void FifoRecorder::SetVideoMemory(const u32* bpMem, const u32* cpMem, const u32* memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE); } - FifoRecordAnalyzer::Initialize(cpMem); + m_record_analyzer = std::make_unique(this, cpMem); } bool FifoRecorder::IsRecording() const diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.h b/Source/Core/Core/FifoPlayer/FifoRecorder.h index cbef424561..3a28d05bce 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.h +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.h @@ -8,6 +8,7 @@ #include #include +#include "Common/Assert.h" #include "Core/FifoPlayer/FifoDataFile.h" class FifoRecorder @@ -47,6 +48,8 @@ public: static FifoRecorder& GetInstance(); private: + class FifoRecordAnalyzer; + // Accessed from both GUI and video threads std::recursive_mutex m_mutex; @@ -65,6 +68,7 @@ private: bool m_SkipFutureData = true; bool m_FrameEnded = false; FifoFrameInfo m_CurrentFrame; + std::unique_ptr m_record_analyzer; std::vector m_FifoData; std::vector m_Ram; std::vector m_ExRam; diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 5897714436..49d2cfe994 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -217,11 +217,8 @@ - - - @@ -815,11 +812,8 @@ - - - diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 223fdd7145..9135d70e09 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -205,23 +205,130 @@ void FIFOAnalyzer::UpdateTree() } } -static std::string GetPrimitiveName(u8 cmd) +namespace { - if ((cmd & 0xC0) != 0x80) +class DetailCallback : public OpcodeDecoder::Callback +{ +public: + explicit DetailCallback(CPState cpmem) : m_cpmem(cpmem) {} + + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { - PanicAlertFmt("Not a primitive command: {:#04x}", cmd); - return ""; + // Note: No need to update m_cpmem as it already has the final value for this object + + const auto [name, desc] = GetCPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QStringLiteral("CP %1 %2 %3") + .arg(command, 2, 16, QLatin1Char('0')) + .arg(value, 8, 16, QLatin1Char('0')) + .arg(QString::fromStdString(name)); } - const u8 vat = cmd & OpcodeDecoder::GX_VAT_MASK; // Vertex loader index (0 - 7) - const u8 primitive = - (cmd & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT; - return fmt::format("{} VAT {}", static_cast(primitive), vat); -} + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) + { + const auto [name, desc] = GetXFTransferInfo(address, count, data); + ASSERT(!name.empty()); + + const u32 command = address | (count << 16); + + text = QStringLiteral("XF %1 ").arg(command, 8, 16, QLatin1Char('0')); + + for (u8 i = 0; i < count; i++) + { + const u32 value = Common::swap32(&data[i * 4]); + + text += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0')); + } + + text += QStringLiteral(" ") + QString::fromStdString(name); + } + + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) + { + const auto [name, desc] = GetBPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QStringLiteral("BP %1 %2 %3") + .arg(command, 2, 16, QLatin1Char('0')) + .arg(value, 6, 16, QLatin1Char('0')) + .arg(QString::fromStdString(name)); + } + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) + { + const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size); + text = QStringLiteral("LOAD INDX %1 %2") + .arg(QString::fromStdString(fmt::to_string(array))) + .arg(QString::fromStdString(desc)); + } + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, const u8* vertex_data)) + { + const auto name = fmt::to_string(primitive); + + // Note that vertex_count is allowed to be 0, with no special treatment + // (another command just comes right after the current command, with no vertices in between) + const u32 object_prim_size = num_vertices * vertex_size; + + const u8 opcode = + 0x80 | (static_cast(primitive) << OpcodeDecoder::GX_PRIMITIVE_SHIFT) | vat; + text = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes") + .arg(QString::fromStdString(name)) + .arg(opcode, 2, 16, QLatin1Char('0')) + .arg(num_vertices) + .arg(vertex_size) + .arg(object_prim_size); + + // It's not really useful to have a massive unreadable hex string for the object primitives. + // Put it in the description instead. + +// #define INCLUDE_HEX_IN_PRIMITIVES +#ifdef INCLUDE_HEX_IN_PRIMITIVES + text += QStringLiteral(" "); + for (u32 i = 0; i < object_prim_size; i++) + { + text += QStringLiteral("%1").arg(vertex_data[i], 2, 16, QLatin1Char('0')); + } +#endif + } + + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) + { + text = QObject::tr("Call display list at %1 with size %2") + .arg(address, 8, 16, QLatin1Char('0')) + .arg(size, 8, 16, QLatin1Char('0')); + } + + OPCODE_CALLBACK(void OnNop(u32 count)) + { + if (count > 1) + text = QStringLiteral("NOP (%1x)").arg(count); + else + text = QStringLiteral("NOP"); + } + + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) + { + using OpcodeDecoder::Opcode; + if (static_cast(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS) + text = QStringLiteral("GX_CMD_UNKNOWN_METRICS"); + else if (static_cast(opcode) == Opcode::GX_CMD_INVL_VC) + text = QStringLiteral("GX_CMD_INVL_VC"); + else + text = QStringLiteral("Unknown opcode %1").arg(opcode, 2, 16); + } + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {} + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + + QString text; + CPState m_cpmem; +}; +} // namespace void FIFOAnalyzer::UpdateDetails() { - using OpcodeDecoder::Opcode; - // Clearing the detail list can update the selection, which causes UpdateDescription to be called // immediately. However, the object data offsets have not been recalculated yet, which can cause // the wrong data to be used, potentially leading to out of bounds data or other bad things. @@ -252,188 +359,23 @@ void FIFOAnalyzer::UpdateDetails() const u32 object_end = frame_info.parts[end_part_nr].m_end; const u32 object_size = object_end - object_start; - const u8* const object = &fifo_frame.fifoData[object_start]; - u32 object_offset = 0; + // NOTE: object_info.m_cpmem is the state of cpmem _after_ all of the commands in this object. + // However, it doesn't matter that it doesn't match the start, since it will match by the time + // primitives are reached. + auto callback = DetailCallback(frame_info.parts[end_part_nr].m_cpmem); + while (object_offset < object_size) { - QString new_label; const u32 start_offset = object_offset; m_object_data_offsets.push_back(start_offset); - const Opcode opcode = static_cast(object[object_offset++]); - switch (opcode) - { - case Opcode::GX_NOP: - if (object[object_offset] == static_cast(Opcode::GX_NOP)) - { - u32 nop_count = 2; - while (object[++object_offset] == static_cast(Opcode::GX_NOP)) - nop_count++; + object_offset += OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + start_offset], + object_size - start_offset, callback); - new_label = QStringLiteral("NOP (%1x)").arg(nop_count); - } - else - { - new_label = QStringLiteral("NOP"); - } - break; - - case Opcode::GX_CMD_UNKNOWN_METRICS: - new_label = QStringLiteral("GX_CMD_UNKNOWN_METRICS"); - break; - - case Opcode::GX_CMD_INVL_VC: - new_label = QStringLiteral("GX_CMD_INVL_VC"); - break; - - case Opcode::GX_LOAD_CP_REG: - { - const u8 cmd2 = object[object_offset++]; - const u32 value = Common::swap32(&object[object_offset]); - object_offset += 4; - - const auto [name, desc] = GetCPRegInfo(cmd2, value); - ASSERT(!name.empty()); - - new_label = QStringLiteral("CP %1 %2 %3") - .arg(cmd2, 2, 16, QLatin1Char('0')) - .arg(value, 8, 16, QLatin1Char('0')) - .arg(QString::fromStdString(name)); - } - break; - - case Opcode::GX_LOAD_XF_REG: - { - const auto [name, desc] = GetXFTransferInfo(&object[object_offset]); - const u32 cmd2 = Common::swap32(&object[object_offset]); - object_offset += 4; - ASSERT(!name.empty()); - - const u8 stream_size = ((cmd2 >> 16) & 15) + 1; - - new_label = QStringLiteral("XF %1 ").arg(cmd2, 8, 16, QLatin1Char('0')); - - for (u8 i = 0; i < stream_size; i++) - { - const u32 value = Common::swap32(&object[object_offset]); - object_offset += 4; - - new_label += QStringLiteral("%1 ").arg(value, 8, 16, QLatin1Char('0')); - } - - new_label += QStringLiteral(" ") + QString::fromStdString(name); - } - break; - - case Opcode::GX_LOAD_INDX_A: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(CPArray::XF_A, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX A %1").arg(QString::fromStdString(desc)); - } - break; - case Opcode::GX_LOAD_INDX_B: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(CPArray::XF_B, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX B %1").arg(QString::fromStdString(desc)); - } - break; - case Opcode::GX_LOAD_INDX_C: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(CPArray::XF_C, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX C %1").arg(QString::fromStdString(desc)); - } - break; - case Opcode::GX_LOAD_INDX_D: - { - const auto [desc, written] = - GetXFIndexedLoadInfo(CPArray::XF_D, Common::swap32(&object[object_offset])); - object_offset += 4; - new_label = QStringLiteral("LOAD INDX D %1").arg(QString::fromStdString(desc)); - } - break; - - case Opcode::GX_CMD_CALL_DL: - // The recorder should have expanded display lists into the fifo stream and skipped the - // call to start them - // That is done to make it easier to track where memory is updated - ASSERT(false); - object_offset += 8; - new_label = QStringLiteral("CALL DL"); - break; - - case Opcode::GX_LOAD_BP_REG: - { - const u8 cmd2 = object[object_offset++]; - const u32 cmddata = Common::swap24(&object[object_offset]); - object_offset += 3; - - const auto [name, desc] = GetBPRegInfo(cmd2, cmddata); - ASSERT(!name.empty()); - - new_label = QStringLiteral("BP %1 %2 %3") - .arg(cmd2, 2, 16, QLatin1Char('0')) - .arg(cmddata, 6, 16, QLatin1Char('0')) - .arg(QString::fromStdString(name)); - } - break; - - default: - { - const u8 command = static_cast(opcode); - if ((command & 0xC0) == 0x80) - { - // Object primitive data - const u8 vat = command & OpcodeDecoder::GX_VAT_MASK; - const auto& vtx_desc = frame_info.parts[end_part_nr].m_cpmem.vtxDesc; - const auto& vtx_attr = frame_info.parts[end_part_nr].m_cpmem.vtxAttr[vat]; - - const auto name = GetPrimitiveName(command); - - const u16 vertex_count = Common::swap16(&object[object_offset]); - object_offset += 2; - const u32 vertex_size = VertexLoaderBase::GetVertexSize(vtx_desc, vtx_attr); - - // Note that vertex_count is allowed to be 0, with no special treatment - // (another command just comes right after the current command, with no vertices in between) - const u32 object_prim_size = vertex_count * vertex_size; - - new_label = QStringLiteral("PRIMITIVE %1 (%2) %3 vertices %4 bytes/vertex %5 total bytes") - .arg(QString::fromStdString(name)) - .arg(command, 2, 16, QLatin1Char('0')) - .arg(vertex_count) - .arg(vertex_size) - .arg(object_prim_size); - - // It's not really useful to have a massive unreadable hex string for the object primitives. - // Put it in the description instead. - -// #define INCLUDE_HEX_IN_PRIMITIVES -#ifdef INCLUDE_HEX_IN_PRIMITIVES - new_label += QStringLiteral(" "); - for (u32 i = 0; i < object_prim_size; i++) - { - new_label += QStringLiteral("%1").arg(object[object_offset++], 2, 16, QLatin1Char('0')); - } -#else - object_offset += object_prim_size; -#endif - } - else - { - new_label = QStringLiteral("Unknown opcode %1").arg(command, 2, 16); - } - break; - } - } - new_label = QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) + - new_label; + QString new_label = + QStringLiteral("%1: ").arg(object_start + start_offset, 8, 16, QLatin1Char('0')) + + callback.text; m_detail_list->addItem(new_label); } @@ -580,10 +522,143 @@ void FIFOAnalyzer::ShowSearchResult(size_t index) m_search_previous->setEnabled(index > 0); } +namespace +{ +// TODO: Not sure whether we should bother translating the descriptions +class DescriptionCallback : public OpcodeDecoder::Callback +{ +public: + explicit DescriptionCallback(const CPState& cpmem) : m_cpmem(cpmem) {} + + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) + { + const auto [name, desc] = GetBPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QObject::tr("BP register "); + text += QString::fromStdString(name); + text += QLatin1Char{'\n'}; + + if (desc.empty()) + text += QObject::tr("No description available"); + else + text += QString::fromStdString(desc); + } + + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) + { + // Note: No need to update m_cpmem as it already has the final value for this object + + const auto [name, desc] = GetCPRegInfo(command, value); + ASSERT(!name.empty()); + + text = QObject::tr("CP register "); + text += QString::fromStdString(name); + text += QLatin1Char{'\n'}; + + if (desc.empty()) + text += QObject::tr("No description available"); + else + text += QString::fromStdString(desc); + } + + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) + { + const auto [name, desc] = GetXFTransferInfo(address, count, data); + ASSERT(!name.empty()); + + text = QObject::tr("XF register "); + text += QString::fromStdString(name); + text += QLatin1Char{'\n'}; + + if (desc.empty()) + text += QObject::tr("No description available"); + else + text += QString::fromStdString(desc); + } + + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) + { + const auto [desc, written] = GetXFIndexedLoadInfo(array, index, address, size); + + text = QString::fromStdString(desc); + text += QLatin1Char{'\n'}; + switch (array) + { + case CPArray::XF_A: + text += QObject::tr("Usually used for position matrices"); + break; + case CPArray::XF_B: + // i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal" + // does not have its usual meaning here, but rather the meaning of "perpendicular to a + // surface". + text += QObject::tr("Usually used for normal matrices"); + break; + case CPArray::XF_C: + // i18n: Tex coord is short for texture coordinate + text += QObject::tr("Usually used for tex coord matrices"); + break; + case CPArray::XF_D: + text += QObject::tr("Usually used for light objects"); + break; + default: + break; + } + text += QLatin1Char{'\n'}; + text += QString::fromStdString(written); + } + + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, const u8* vertex_data)) + { + const auto name = fmt::format("{} VAT {}", primitive, vat); + + // i18n: In this context, a primitive means a point, line, triangle or rectangle. + // Do not translate the word primitive as if it was an adjective. + text = QObject::tr("Primitive %1").arg(QString::fromStdString(name)); + text += QLatin1Char{'\n'}; + + const auto& vtx_desc = m_cpmem.vtx_desc; + const auto& vtx_attr = m_cpmem.vtx_attr[vat]; + const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr); + + u32 i = 0; + for (u32 vertex_num = 0; vertex_num < num_vertices; vertex_num++) + { + text += QLatin1Char{'\n'}; + for (u32 comp_size : component_sizes) + { + for (u32 comp_off = 0; comp_off < comp_size; comp_off++) + { + text += QStringLiteral("%1").arg(vertex_data[i++], 2, 16, QLatin1Char('0')); + } + text += QLatin1Char{' '}; + } + } + } + + OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) + { + text = QObject::tr("No description available"); + } + + OPCODE_CALLBACK(void OnNop(u32 count)) { text = QObject::tr("No description available"); } + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) + { + text = QObject::tr("No description available"); + } + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {} + + OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + + QString text; + CPState m_cpmem; +}; +} // namespace + void FIFOAnalyzer::UpdateDescription() { - using OpcodeDecoder::Opcode; - m_entry_detail_browser->clear(); if (!FifoPlayer::GetInstance().IsPlaying()) @@ -606,138 +681,12 @@ void FIFOAnalyzer::UpdateDescription() const FifoFrameInfo& fifo_frame = FifoPlayer::GetInstance().GetFile()->GetFrame(frame_nr); const u32 object_start = frame_info.parts[start_part_nr].m_start; + const u32 object_end = frame_info.parts[end_part_nr].m_end; + const u32 object_size = object_end - object_start; const u32 entry_start = m_object_data_offsets[entry_nr]; - const u8* cmddata = &fifo_frame.fifoData[object_start + entry_start]; - const Opcode opcode = static_cast(*cmddata); - - // TODO: Not sure whether we should bother translating the descriptions - - QString text; - if (opcode == Opcode::GX_LOAD_BP_REG) - { - const u8 cmd = *(cmddata + 1); - const u32 value = Common::swap24(cmddata + 2); - - const auto [name, desc] = GetBPRegInfo(cmd, value); - ASSERT(!name.empty()); - - text = tr("BP register "); - text += QString::fromStdString(name); - text += QLatin1Char{'\n'}; - - if (desc.empty()) - text += tr("No description available"); - else - text += QString::fromStdString(desc); - } - else if (opcode == Opcode::GX_LOAD_CP_REG) - { - const u8 cmd = *(cmddata + 1); - const u32 value = Common::swap32(cmddata + 2); - - const auto [name, desc] = GetCPRegInfo(cmd, value); - ASSERT(!name.empty()); - - text = tr("CP register "); - text += QString::fromStdString(name); - text += QLatin1Char{'\n'}; - - if (desc.empty()) - text += tr("No description available"); - else - text += QString::fromStdString(desc); - } - else if (opcode == Opcode::GX_LOAD_XF_REG) - { - const auto [name, desc] = GetXFTransferInfo(cmddata + 1); - ASSERT(!name.empty()); - - text = tr("XF register "); - text += QString::fromStdString(name); - text += QLatin1Char{'\n'}; - - if (desc.empty()) - text += tr("No description available"); - else - text += QString::fromStdString(desc); - } - else if (opcode == Opcode::GX_LOAD_INDX_A) - { - const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_A, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - text += tr("Usually used for position matrices"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if (opcode == Opcode::GX_LOAD_INDX_B) - { - const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_B, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - // i18n: A normal matrix is a matrix used for transforming normal vectors. The word "normal" - // does not have its usual meaning here, but rather the meaning of "perpendicular to a surface". - text += tr("Usually used for normal matrices"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if (opcode == Opcode::GX_LOAD_INDX_C) - { - const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_C, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - // i18n: Tex coord is short for texture coordinate - text += tr("Usually used for tex coord matrices"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if (opcode == Opcode::GX_LOAD_INDX_D) - { - const auto [desc, written] = GetXFIndexedLoadInfo(CPArray::XF_D, Common::swap32(cmddata + 1)); - - text = QString::fromStdString(desc); - text += QLatin1Char{'\n'}; - text += tr("Usually used for light objects"); - text += QLatin1Char{'\n'}; - text += QString::fromStdString(written); - } - else if ((*cmddata & 0xC0) == 0x80) - { - const u8 vat = *cmddata & OpcodeDecoder::GX_VAT_MASK; - const QString name = QString::fromStdString(GetPrimitiveName(*cmddata)); - const u16 vertex_count = Common::swap16(cmddata + 1); - - // i18n: In this context, a primitive means a point, line, triangle or rectangle. - // Do not translate the word primitive as if it was an adjective. - text = tr("Primitive %1").arg(name); - text += QLatin1Char{'\n'}; - - const auto& vtx_desc = frame_info.parts[end_part_nr].m_cpmem.vtxDesc; - const auto& vtx_attr = frame_info.parts[end_part_nr].m_cpmem.vtxAttr[vat]; - const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr); - - u32 i = 3; - for (u32 vertex_num = 0; vertex_num < vertex_count; vertex_num++) - { - text += QLatin1Char{'\n'}; - for (u32 comp_size : component_sizes) - { - for (u32 comp_off = 0; comp_off < comp_size; comp_off++) - { - text += QStringLiteral("%1").arg(cmddata[i++], 2, 16, QLatin1Char('0')); - } - text += QLatin1Char{' '}; - } - } - } - else - { - text = tr("No description available"); - } - - m_entry_detail_browser->setText(text); + auto callback = DescriptionCallback(frame_info.parts[end_part_nr].m_cpmem); + OpcodeDecoder::RunCommand(&fifo_frame.fifoData[object_start + entry_start], + object_size - entry_start, callback); + m_entry_detail_browser->setText(callback.text); } diff --git a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp index cdcad8eeb2..253017a952 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp @@ -21,7 +21,6 @@ #include "Core/Core.h" #include "Core/FifoPlayer/FifoDataFile.h" -#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h" #include "Core/FifoPlayer/FifoPlayer.h" #include "Core/FifoPlayer/FifoRecorder.h" diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index fdd32746a9..97dc5993bd 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -2205,7 +2205,7 @@ struct BPMemory extern BPMemory bpmem; -void LoadBPReg(u32 value0, int cycles_into_future); -void LoadBPRegPreprocess(u32 value0, int cycles_into_future); +void LoadBPReg(u8 reg, u32 value, int cycles_into_future); +void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future); std::pair GetBPRegInfo(u8 cmd, u32 cmddata); diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 0fc4ca6785..503ef6154f 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -716,29 +716,27 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) bp.newvalue); } -// Call browser: OpcodeDecoding.cpp ExecuteDisplayList > Decode() > LoadBPReg() -void LoadBPReg(u32 value0, int cycles_into_future) +// Call browser: OpcodeDecoding.cpp RunCallback::OnBP() +void LoadBPReg(u8 reg, u32 value, int cycles_into_future) { - int regNum = value0 >> 24; - int oldval = ((u32*)&bpmem)[regNum]; - int newval = (oldval & ~bpmem.bpMask) | (value0 & bpmem.bpMask); + int oldval = ((u32*)&bpmem)[reg]; + int newval = (oldval & ~bpmem.bpMask) | (value & bpmem.bpMask); int changes = (oldval ^ newval) & 0xFFFFFF; - BPCmd bp = {regNum, changes, newval}; + BPCmd bp = {reg, changes, newval}; // Reset the mask register if we're not trying to set it ourselves. - if (regNum != BPMEM_BP_MASK) + if (reg != BPMEM_BP_MASK) bpmem.bpMask = 0xFFFFFF; BPWritten(bp, cycles_into_future); } -void LoadBPRegPreprocess(u32 value0, int cycles_into_future) +void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future) { - int regNum = value0 >> 24; - // masking could hypothetically be a problem - u32 newval = value0 & 0xffffff; - switch (regNum) + // masking via BPMEM_BP_MASK could hypothetically be a problem + u32 newval = value & 0xffffff; + switch (reg) { case BPMEM_SETDRAWDONE: if ((newval & 0xff) == 0x02) diff --git a/Source/Core/VideoCommon/CPMemory.cpp b/Source/Core/VideoCommon/CPMemory.cpp index afa354b4e4..4781595e8c 100644 --- a/Source/Core/VideoCommon/CPMemory.cpp +++ b/Source/Core/VideoCommon/CPMemory.cpp @@ -2,7 +2,13 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoCommon/CPMemory.h" + +#include + #include "Common/ChunkFile.h" +#include "Common/Logging/Log.h" +#include "Core/DolphinAnalytics.h" +#include "VideoCommon/CommandProcessor.h" // CP state CPState g_main_cp_state; @@ -28,7 +34,7 @@ void DoCPState(PointerWrap& p) void CopyPreprocessCPStateFromMain() { - memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState)); + std::memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState)); } std::pair GetCPRegInfo(u8 cmd, u32 value) @@ -73,3 +79,164 @@ std::pair GetCPRegInfo(u8 cmd, u32 value) return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), ""); } } + +CPState::CPState(const u32* memory) : CPState() +{ + matrix_index_a.Hex = memory[MATINDEX_A]; + matrix_index_b.Hex = memory[MATINDEX_B]; + vtx_desc.low.Hex = memory[VCD_LO]; + vtx_desc.high.Hex = memory[VCD_HI]; + + for (u32 i = 0; i < CP_NUM_VAT_REG; i++) + { + vtx_attr[i].g0.Hex = memory[CP_VAT_REG_A + i]; + vtx_attr[i].g1.Hex = memory[CP_VAT_REG_B + i]; + vtx_attr[i].g2.Hex = memory[CP_VAT_REG_C + i]; + } + + for (u32 i = 0; i < CP_NUM_ARRAYS; i++) + { + array_bases[static_cast(i)] = memory[ARRAY_BASE + i]; + array_strides[static_cast(i)] = memory[ARRAY_STRIDE + i]; + } +} + +void CPState::LoadCPReg(u8 sub_cmd, u32 value) +{ + switch (sub_cmd & CP_COMMAND_MASK) + { + case UNKNOWN_00: + case UNKNOWN_10: + case UNKNOWN_20: + if (!(sub_cmd == UNKNOWN_20 && value == 0)) + { + // All titles using libogc or the official SDK issue 0x20 with value=0 on startup + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND); + DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}", + sub_cmd); + } + break; + + case MATINDEX_A: + if (sub_cmd != MATINDEX_A) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP MATINDEX_A: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + MATINDEX_A, sub_cmd); + } + + matrix_index_a.Hex = value; + break; + + case MATINDEX_B: + if (sub_cmd != MATINDEX_B) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP MATINDEX_B: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + MATINDEX_B, sub_cmd); + } + + matrix_index_b.Hex = value; + break; + + case VCD_LO: + if (sub_cmd != VCD_LO) // Stricter than YAGCD + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP VCD_LO: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + VCD_LO, sub_cmd); + } + + vtx_desc.low.Hex = value; + attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); + bases_dirty = true; + break; + + case VCD_HI: + if (sub_cmd != VCD_HI) // Stricter than YAGCD + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, + "CP VCD_HI: an exact value of {:02x} was expected " + "but instead a value of {:02x} was seen", + VCD_HI, sub_cmd); + } + + vtx_desc.high.Hex = value; + attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); + bases_dirty = true; + break; + + case CP_VAT_REG_A: + if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A); + } + vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value; + attr_dirty[sub_cmd & CP_VAT_MASK] = true; + break; + + case CP_VAT_REG_B: + if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B); + } + vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value; + attr_dirty[sub_cmd & CP_VAT_MASK] = true; + break; + + case CP_VAT_REG_C: + if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG) + { + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C); + } + vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value; + attr_dirty[sub_cmd & CP_VAT_MASK] = true; + break; + + // Pointers to vertex arrays in GC RAM + case ARRAY_BASE: + array_bases[static_cast(sub_cmd & CP_ARRAY_MASK)] = + value & CommandProcessor::GetPhysicalAddressMask(); + bases_dirty = true; + break; + + case ARRAY_STRIDE: + array_strides[static_cast(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF; + break; + + default: + DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND); + WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value); + } +} + +void CPState::FillCPMemoryArray(u32* memory) const +{ + memory[MATINDEX_A] = matrix_index_a.Hex; + memory[MATINDEX_B] = matrix_index_b.Hex; + memory[VCD_LO] = vtx_desc.low.Hex; + memory[VCD_HI] = vtx_desc.high.Hex; + + for (int i = 0; i < CP_NUM_VAT_REG; ++i) + { + memory[CP_VAT_REG_A + i] = vtx_attr[i].g0.Hex; + memory[CP_VAT_REG_B + i] = vtx_attr[i].g1.Hex; + memory[CP_VAT_REG_C + i] = vtx_attr[i].g2.Hex; + } + + for (int i = 0; i < CP_NUM_ARRAYS; ++i) + { + memory[ARRAY_BASE + i] = array_bases[static_cast(i)]; + memory[ARRAY_STRIDE + i] = array_strides[static_cast(i)]; + } +} diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 1a937d15c9..defac506b2 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "Common/BitField.h" @@ -630,13 +631,21 @@ class VertexLoaderBase; // STATE_TO_SAVE struct CPState final { + CPState() = default; + explicit CPState(const u32* memory); + + // Mutates the CP state based on the given command and value. + void LoadCPReg(u8 sub_cmd, u32 value); + // Fills memory with data from CP regs. There should be space for 0x100 values in memory. + void FillCPMemoryArray(u32* memory) const; + Common::EnumMap array_bases; Common::EnumMap array_strides; TMatrixIndexA matrix_index_a{}; TMatrixIndexB matrix_index_b{}; TVtxDesc vtx_desc; // Most games only use the first VtxAttr and simply reconfigure it all the time as needed. - VAT vtx_attr[CP_NUM_VAT_REG]{}; + std::array vtx_attr{}; // Attributes that actually belong to VertexLoaderManager: BitSet32 attr_dirty{}; @@ -644,18 +653,13 @@ struct CPState final VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{}; int last_id = 0; }; +static_assert(std::is_trivially_copyable_v); class PointerWrap; extern CPState g_main_cp_state; extern CPState g_preprocess_cp_state; -// Might move this into its own file later. -void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false); - -// Fills memory with data from CP regs -void FillCPMemoryArray(u32* memory); - void DoCPState(PointerWrap& p); void CopyPreprocessCPStateFromMain(); diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 83784a137f..ddaa0e72a3 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "Common/Assert.h" #include "Common/ChunkFile.h" @@ -607,10 +608,10 @@ void SetCpClearRegister() { } -void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) +void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess) { // TODO(Omega): Maybe dump FIFO to file on this error - PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, {2}).\n" + PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, preprocess={2}).\n" "This means one of the following:\n" "* The emulated GPU got desynced, disabling dual core can help\n" "* Command stream corrupted by some spurious memory bug\n" @@ -618,7 +619,7 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) "* Some other sort of bug\n\n" "Further errors will be sent to the Video Backend log and\n" "Dolphin will now likely crash or hang. Enjoy.", - cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false"); + cmd_byte, fmt::ptr(buffer), preprocess); { PanicAlertFmt("Illegal command {:02x}\n" diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index 4ca73c71d2..2da7f1c84d 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -169,7 +169,7 @@ void SetCpClearRegister(); void SetCpControlRegister(); void SetCpStatusRegister(); -void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess); +void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess); u32 GetPhysicalAddressMask(); diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 04fc00d33a..f96c71b550 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -273,8 +273,8 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) } } Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); - s_video_buffer_pp_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false); + s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo( + DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr); // This would have to be locked if the GPU thread didn't spin. s_video_buffer_write_ptr = write_ptr + len; } @@ -316,7 +316,7 @@ void RunGpuLoop() if (write_ptr > seen_ptr) { s_video_buffer_read_ptr = - OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); + OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr); s_video_buffer_seen_ptr = write_ptr; } } @@ -349,8 +349,8 @@ void RunGpuLoop() fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32); u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); + s_video_buffer_read_ptr = OpcodeDecoder::RunFifo( + DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted); fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed); fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst); @@ -466,8 +466,8 @@ static int RunGpuOnCpu(int ticks) } ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed)); u32 cycles = 0; - s_video_buffer_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); + s_video_buffer_read_ptr = OpcodeDecoder::RunFifo( + DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles); available_ticks -= cycles; } diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index be879ddfbc..239aec158a 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -14,7 +14,7 @@ #include "VideoCommon/OpcodeDecoding.h" -#include "Common/CommonTypes.h" +#include "Common/Assert.h" #include "Common/Logging/Log.h" #include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" @@ -24,55 +24,15 @@ #include "VideoCommon/DataReader.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/XFMemory.h" +#include "VideoCommon/XFStructs.h" namespace OpcodeDecoder { -namespace -{ bool s_is_fifo_error_seen = false; - -u32 InterpretDisplayList(u32 address, u32 size) -{ - u8* start_address; - - if (Fifo::UseDeterministicGPUThread()) - start_address = static_cast(Fifo::PopFifoAuxBuffer(size)); - else - start_address = Memory::GetPointer(address); - - u32 cycles = 0; - - // Avoid the crash if Memory::GetPointer failed .. - if (start_address != nullptr) - { - // temporarily swap dl and non-dl (small "hack" for the stats) - g_stats.SwapDL(); - - Run(DataReader(start_address, start_address + size), &cycles, true); - INCSTAT(g_stats.this_frame.num_dlists_called); - - // un-swap - g_stats.SwapDL(); - } - - return cycles; -} - -void InterpretDisplayListPreprocess(u32 address, u32 size) -{ - u8* const start_address = Memory::GetPointer(address); - - Fifo::PushFifoAuxBuffer(start_address, size); - - if (start_address == nullptr) - return; - - Run(DataReader(start_address, start_address + size), nullptr, true); -} -} // Anonymous namespace - bool g_record_fifo_data = false; void Init() @@ -81,203 +41,205 @@ void Init() } template -u8* Run(DataReader src, u32* cycles, bool in_display_list) +class RunCallback final : public Callback { - u32 total_cycles = 0; - u8* opcode_start = nullptr; - - const auto finish_up = [cycles, &opcode_start, &total_cycles] { - if (cycles != nullptr) - { - *cycles = total_cycles; - } - return opcode_start; - }; - - while (true) +public: + OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) { - opcode_start = src.GetPointer(); + m_cycles += 18 + 6 * count; - if (!src.size()) - return finish_up(); - - const u8 cmd_byte = src.Read(); - switch (static_cast(cmd_byte)) - { - case Opcode::GX_NOP: - total_cycles += 6; // Hm, this means that we scan over nop streams pretty slowly... - break; - - case Opcode::GX_UNKNOWN_RESET: - total_cycles += 6; // Datel software uses this command - DEBUG_LOG_FMT(VIDEO, "GX Reset?: {:08x}", cmd_byte); - break; - - case Opcode::GX_LOAD_CP_REG: - { - if (src.size() < 1 + 4) - return finish_up(); - - total_cycles += 12; - - const u8 sub_cmd = src.Read(); - const u32 value = src.Read(); - LoadCPReg(sub_cmd, value, is_preprocess); - if constexpr (!is_preprocess) - INCSTAT(g_stats.this_frame.num_cp_loads); - } - break; - - case Opcode::GX_LOAD_XF_REG: - { - if (src.size() < 4) - return finish_up(); - - const u32 cmd2 = src.Read(); - const u32 transfer_size = ((cmd2 >> 16) & 15) + 1; - if (src.size() < transfer_size * sizeof(u32)) - return finish_up(); - - total_cycles += 18 + 6 * transfer_size; - - if constexpr (!is_preprocess) - { - const u32 xf_address = cmd2 & 0xFFFF; - LoadXFReg(transfer_size, xf_address, src); - - INCSTAT(g_stats.this_frame.num_xf_loads); - } - src.Skip(transfer_size); - } - break; - - case Opcode::GX_LOAD_INDX_A: // Used for position matrices - case Opcode::GX_LOAD_INDX_B: // Used for normal matrices - case Opcode::GX_LOAD_INDX_C: // Used for postmatrices - case Opcode::GX_LOAD_INDX_D: // Used for lights - { - if (src.size() < 4) - return finish_up(); - - total_cycles += 6; - - // Map the command byte to its ref array. - // GX_LOAD_INDX_A (32) -> 0xC - // GX_LOAD_INDX_B (40) -> 0xD - // GX_LOAD_INDX_C (48) -> 0xE - // GX_LOAD_INDX_D (56) -> 0xF - const auto array = static_cast((cmd_byte / 8) + 8); - - if constexpr (is_preprocess) - PreprocessIndexedXF(array, src.Read()); - else - LoadIndexedXF(array, src.Read()); - } - break; - - case Opcode::GX_CMD_CALL_DL: - { - if (src.size() < 8) - return finish_up(); - - const u32 address = src.Read(); - const u32 count = src.Read(); - - if (in_display_list) - { - total_cycles += 6; - INFO_LOG_FMT(VIDEO, "recursive display list detected"); - } - else - { - if constexpr (is_preprocess) - InterpretDisplayListPreprocess(address, count); - else - total_cycles += 6 + InterpretDisplayList(address, count); - } - } - break; - - case Opcode::GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics - // registers after that - total_cycles += 6; - DEBUG_LOG_FMT(VIDEO, "GX 0x44: {:08x}", cmd_byte); - break; - - case Opcode::GX_CMD_INVL_VC: // Invalidate Vertex Cache - total_cycles += 6; - DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)"); - break; - - case Opcode::GX_LOAD_BP_REG: - // In skipped_frame case: We have to let BP writes through because they set - // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. - { - if (src.size() < 4) - return finish_up(); - - total_cycles += 12; - - const u32 bp_cmd = src.Read(); - if constexpr (is_preprocess) - { - LoadBPRegPreprocess(bp_cmd, total_cycles); - } - else - { - LoadBPReg(bp_cmd, total_cycles); - INCSTAT(g_stats.this_frame.num_bp_loads); - } - } - break; - - // draw primitives - default: - if ((cmd_byte & 0xC0) == 0x80) - { - // load vertices - if (src.size() < 2) - return finish_up(); - - const u16 num_vertices = src.Read(); - const int bytes = VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - static_cast((cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT), - num_vertices, src, is_preprocess); - - if (bytes < 0) - return finish_up(); - - src.Skip(bytes); - - // 4 GPU ticks per vertex, 3 CPU ticks per GPU tick - total_cycles += num_vertices * 4 * 3 + 6; - } - else - { - if (!s_is_fifo_error_seen) - CommandProcessor::HandleUnknownOpcode(cmd_byte, opcode_start, is_preprocess); - ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", cmd_byte, - fmt::ptr(opcode_start), is_preprocess ? "yes" : "no"); - s_is_fifo_error_seen = true; - total_cycles += 1; - } - break; - } - - // Display lists get added directly into the FIFO stream if constexpr (!is_preprocess) { - if (g_record_fifo_data && static_cast(cmd_byte) != Opcode::GX_CMD_CALL_DL) + // HACK + LoadXFReg(count, address, + DataReader{const_cast(data), const_cast(data) + count * sizeof(u32)}); + + INCSTAT(g_stats.this_frame.num_xf_loads); + } + } + OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) + { + m_cycles += 12; + if constexpr (!is_preprocess) + { + // TODO: Move all dirty state checking here or to VertexLoaderManager, + // instead of it being in CPState + if (command == MATINDEX_A) + VertexShaderManager::SetTexMatrixChangedA(value); + else if (command == MATINDEX_B) + VertexShaderManager::SetTexMatrixChangedB(value); + + INCSTAT(g_stats.this_frame.num_cp_loads); + } + GetCPState().LoadCPReg(command, value); + } + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) + { + m_cycles += 12; + + if constexpr (is_preprocess) + { + LoadBPRegPreprocess(command, value, m_cycles); + } + else + { + LoadBPReg(command, value, m_cycles); + INCSTAT(g_stats.this_frame.num_bp_loads); + } + } + OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) + { + m_cycles += 6; + + if constexpr (is_preprocess) + PreprocessIndexedXF(array, index, address, size); + else + LoadIndexedXF(array, index, address, size); + } + OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, + u32 vertex_size, u16 num_vertices, const u8* vertex_data)) + { + // load vertices + const u32 size = vertex_size * num_vertices; + + // HACK + DataReader src{const_cast(vertex_data), const_cast(vertex_data) + size}; + const u32 bytes = + VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess); + + ASSERT(bytes == size); + + // 4 GPU ticks per vertex, 3 CPU ticks per GPU tick + m_cycles += num_vertices * 4 * 3 + 6; + } + // This can't be inlined since it calls Run, which makes it recursive + // m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit + // to inlining Run for the display list directly. + OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size)) + { + m_cycles += 6; + + if (m_in_display_list) + { + WARN_LOG_FMT(VIDEO, "recursive display list detected"); + } + else + { + m_in_display_list = true; + + if constexpr (is_preprocess) { - const u8* const opcode_end = src.GetPointer(); - FifoRecorder::GetInstance().WriteGPCommand(opcode_start, u32(opcode_end - opcode_start)); + const u8* const start_address = Memory::GetPointer(address); + + Fifo::PushFifoAuxBuffer(start_address, size); + + if (start_address != nullptr) + { + Run(start_address, size, *this); + } + } + else + { + const u8* start_address; + + if (Fifo::UseDeterministicGPUThread()) + start_address = static_cast(Fifo::PopFifoAuxBuffer(size)); + else + start_address = Memory::GetPointer(address); + + // Avoid the crash if Memory::GetPointer failed .. + if (start_address != nullptr) + { + // temporarily swap dl and non-dl (small "hack" for the stats) + g_stats.SwapDL(); + + Run(start_address, size, *this); + INCSTAT(g_stats.this_frame.num_dlists_called); + + // un-swap + g_stats.SwapDL(); + } + } + + m_in_display_list = false; + } + } + OPCODE_CALLBACK(void OnNop(u32 count)) + { + m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly... + } + OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) + { + if (static_cast(opcode) == Opcode::GX_UNKNOWN_RESET) + { + // Datel software uses this command + m_cycles += 6; + DEBUG_LOG_FMT(VIDEO, "GX Reset?"); + } + else if (static_cast(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS) + { + // 'Zelda Four Swords' calls it and checks the metrics registers after that + m_cycles += 6; + DEBUG_LOG_FMT(VIDEO, "GX 0x44"); + } + else if (static_cast(opcode) == Opcode::GX_CMD_INVL_VC) + { + // Invalidate Vertex Cache + m_cycles += 6; + DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)"); + } + else + { + if (!s_is_fifo_error_seen) + CommandProcessor::HandleUnknownOpcode(opcode, data, is_preprocess); + ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", opcode, + fmt::ptr(data), is_preprocess ? "yes" : "no"); + s_is_fifo_error_seen = true; + m_cycles += 1; + } + } + + OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) + { + ASSERT(size >= 1); + if constexpr (!is_preprocess) + { + // Display lists get added directly into the FIFO stream since this same callback is used to + // process them. + if (g_record_fifo_data && static_cast(data[0]) != Opcode::GX_CMD_CALL_DL) + { + FifoRecorder::GetInstance().WriteGPCommand(data, size); } } } + + OPCODE_CALLBACK(CPState& GetCPState()) + { + if constexpr (is_preprocess) + return g_preprocess_cp_state; + else + return g_main_cp_state; + } + + u32 m_cycles = 0; + bool m_in_display_list = false; +}; + +template +u8* RunFifo(DataReader src, u32* cycles) +{ + using CallbackT = RunCallback; + auto callback = CallbackT{}; + u32 size = Run(src.GetPointer(), static_cast(src.size()), callback); + + if (cycles != nullptr) + *cycles = callback.m_cycles; + + src.Skip(size); + return src.GetPointer(); } -template u8* Run(DataReader src, u32* cycles, bool in_display_list); -template u8* Run(DataReader src, u32* cycles, bool in_display_list); +template u8* RunFifo(DataReader src, u32* cycles); +template u8* RunFifo(DataReader src, u32* cycles); } // namespace OpcodeDecoder diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index df1059f221..2d1632efc3 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -3,9 +3,17 @@ #pragma once +#include + +#include "Common/Assert.h" #include "Common/CommonTypes.h" #include "Common/EnumFormatter.h" +#include "Common/Inline.h" +#include "Common/Swap.h" +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/VertexLoaderBase.h" +struct CPState; class DataReader; namespace OpcodeDecoder @@ -55,8 +63,220 @@ enum class Primitive : u8 void Init(); +// Interface for the Run and RunCommand functions below. +// The functions themselves are templates so that the compiler generates separate versions for each +// callback (with the callback functions inlined), so the callback doesn't actually need to be +// publicly inherited. +// Compilers don't generate warnings for failed inlining with virtual functions, so this define +// allows disabling the use of virtual functions to generate those warnings. However, this means +// that missing functions will generate errors on their use in RunCommand, instead of in the +// subclass, which can be confusing. +#define OPCODE_CALLBACK_USE_INHERITANCE + +#ifdef OPCODE_CALLBACK_USE_INHERITANCE +#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig override +#define OPCODE_CALLBACK_NOINLINE(sig) sig override +#else +#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig +#define OPCODE_CALLBACK_NOINLINE(sig) sig +#endif +class Callback +{ +#ifdef OPCODE_CALLBACK_USE_INHERITANCE +public: + virtual ~Callback() = default; + + // Called on any XF command. + virtual void OnXF(u16 address, u8 count, const u8* data) = 0; + // Called on any CP command. + // Subclasses should update the CP state with GetCPState().LoadCPReg(command, value) so that + // primitive commands decode properly. + virtual void OnCP(u8 command, u32 value) = 0; + // Called on any BP command. + virtual void OnBP(u8 command, u32 value) = 0; + // Called on any indexed XF load command. + virtual void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) = 0; + // Called on any primitive command. + virtual void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size, + u16 num_vertices, const u8* vertex_data) = 0; + // Called on a display list. + virtual void OnDisplayList(u32 address, u32 size) = 0; + // Called on any NOP commands (which are all merged into a single call). + virtual void OnNop(u32 count) = 0; + // Called on an unknown opcode, or an opcode that is known but not implemented. + // data[0] is opcode. + virtual void OnUnknown(u8 opcode, const u8* data) = 0; + + // Called on ANY command. The first byte of data is the opcode. Size will be at least 1. + // This function is called after one of the above functions is called. + virtual void OnCommand(const u8* data, u32 size) = 0; + + // Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands. + virtual CPState& GetCPState() = 0; +#endif +}; + +namespace detail +{ +// Main logic; split so that the main RunCommand can call OnCommand with the returned size. +template >> +static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback) +{ + if (available < 1) + return 0; + + const Opcode cmd = static_cast(data[0]); + + switch (cmd) + { + case Opcode::GX_NOP: + { + u32 count = 1; + while (count < available && static_cast(data[count]) == Opcode::GX_NOP) + count++; + callback.OnNop(count); + return count; + } + + case Opcode::GX_LOAD_CP_REG: + { + if (available < 6) + return 0; + + const u8 cmd2 = data[1]; + const u32 value = Common::swap32(&data[2]); + + callback.OnCP(cmd2, value); + + return 6; + } + + case Opcode::GX_LOAD_XF_REG: + { + if (available < 5) + return 0; + + const u32 cmd2 = Common::swap32(&data[1]); + const u16 base_address = cmd2 & 0xffff; + + const u16 stream_size_temp = cmd2 >> 16; + ASSERT(stream_size_temp < 16); + const u8 stream_size = (stream_size_temp & 0xf) + 1; + + if (available < u32(5 + stream_size * 4)) + return 0; + + callback.OnXF(base_address, stream_size, &data[5]); + + return 5 + stream_size * 4; + } + + case Opcode::GX_LOAD_INDX_A: // Used for position matrices + case Opcode::GX_LOAD_INDX_B: // Used for normal matrices + case Opcode::GX_LOAD_INDX_C: // Used for postmatrices + case Opcode::GX_LOAD_INDX_D: // Used for lights + { + if (available < 5) + return 0; + + const u32 value = Common::swap32(&data[1]); + + const u32 index = value >> 16; + const u16 address = value & 0xFFF; // TODO: check mask + const u8 size = ((value >> 12) & 0xF) + 1; + + // Map the command byte to its ref array. + // GX_LOAD_INDX_A (32 = 8*4) . CPArray::XF_A (4+8 = 12) + // GX_LOAD_INDX_B (40 = 8*5) . CPArray::XF_B (5+8 = 13) + // GX_LOAD_INDX_C (48 = 8*6) . CPArray::XF_C (6+8 = 14) + // GX_LOAD_INDX_D (56 = 8*7) . CPArray::XF_D (7+8 = 15) + const auto ref_array = static_cast((static_cast(cmd) / 8) + 8); + + callback.OnIndexedLoad(ref_array, index, address, size); + return 5; + } + + case Opcode::GX_CMD_CALL_DL: + { + if (available < 9) + return 0; + + const u32 address = Common::swap32(&data[1]); + const u32 size = Common::swap32(&data[5]); + + callback.OnDisplayList(address, size); + return 9; + } + + case Opcode::GX_LOAD_BP_REG: + { + if (available < 5) + return 0; + + const u8 cmd2 = data[1]; + const u32 value = Common::swap24(&data[2]); + + callback.OnBP(cmd2, value); + + return 5; + } + + default: + if (cmd >= Opcode::GX_PRIMITIVE_START && cmd <= Opcode::GX_PRIMITIVE_END) + { + if (available < 3) + return 0; + + const u8 cmdbyte = static_cast(cmd); + const OpcodeDecoder::Primitive primitive = static_cast( + (cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT); + const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK; + + const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc, + callback.GetCPState().vtx_attr[vat]); + const u16 num_vertices = Common::swap16(&data[1]); + + if (available < 3 + num_vertices * vertex_size) + return 0; + + callback.OnPrimitiveCommand(primitive, vat, vertex_size, num_vertices, &data[3]); + + return 3 + num_vertices * vertex_size; + } + } + + callback.OnUnknown(static_cast(cmd), data); + return 1; +} +} // namespace detail + +template >> +DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback) +{ + const u32 size = detail::RunCommand(data, available, callback); + if (size > 0) + { + callback.OnCommand(data, size); + } + return size; +} + +template >> +DOLPHIN_FORCE_INLINE u32 Run(const u8* data, u32 available, T& callback) +{ + u32 size = 0; + while (size < available) + { + const u32 command_size = RunCommand(&data[size], available - size, callback); + if (command_size == 0) + break; + size += command_size; + } + return size; +} + template -u8* Run(DataReader src, u32* cycles, bool in_display_list); +u8* RunFifo(DataReader src, u32* cycles); } // namespace OpcodeDecoder diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index e606a464e6..16d36f7453 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -963,7 +963,7 @@ void Renderer::RecordVideoMemory() const u32* xfregs_ptr = reinterpret_cast(&xfmem) + FifoDataFile::XF_MEM_SIZE; u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE; - FillCPMemoryArray(cpmem); + g_main_cp_state.FillCPMemoryArray(cpmem); FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size, texMem); diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 6663e6c8ae..75afefed16 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -405,7 +405,7 @@ void VertexLoaderARM64::GenerateVertexLoader() MOV(skipped_reg, ARM64Reg::WZR); MOV(saved_count, count_reg); - MOVP2R(stride_reg, g_main_cp_state.array_strides); + MOVP2R(stride_reg, g_main_cp_state.array_strides.data()); MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases); if (need_scale) diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index e2475d666f..901e14044f 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -12,17 +12,14 @@ #include #include -#include "Common/Assert.h" #include "Common/CommonTypes.h" #include "Common/EnumMap.h" #include "Common/Logging/Log.h" -#include "Core/DolphinAnalytics.h" #include "Core/HW/Memmap.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/CPMemory.h" -#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" @@ -298,147 +295,3 @@ NativeVertexFormat* GetCurrentVertexFormat() } } // namespace VertexLoaderManager - -void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess) -{ - bool update_global_state = !is_preprocess; - CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state; - switch (sub_cmd & CP_COMMAND_MASK) - { - case UNKNOWN_00: - case UNKNOWN_10: - case UNKNOWN_20: - if (!(sub_cmd == UNKNOWN_20 && value == 0)) - { - // All titles using libogc or the official SDK issue 0x20 with value=0 on startup - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND); - DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}", - sub_cmd); - } - break; - - case MATINDEX_A: - if (sub_cmd != MATINDEX_A) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP MATINDEX_A: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - MATINDEX_A, sub_cmd); - } - - if (update_global_state) - VertexShaderManager::SetTexMatrixChangedA(value); - break; - - case MATINDEX_B: - if (sub_cmd != MATINDEX_B) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP MATINDEX_B: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - MATINDEX_B, sub_cmd); - } - - if (update_global_state) - VertexShaderManager::SetTexMatrixChangedB(value); - break; - - case VCD_LO: - if (sub_cmd != VCD_LO) // Stricter than YAGCD - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP VCD_LO: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - VCD_LO, sub_cmd); - } - - state->vtx_desc.low.Hex = value; - state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); - state->bases_dirty = true; - break; - - case VCD_HI: - if (sub_cmd != VCD_HI) // Stricter than YAGCD - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, - "CP VCD_HI: an exact value of {:02x} was expected " - "but instead a value of {:02x} was seen", - VCD_HI, sub_cmd); - } - - state->vtx_desc.high.Hex = value; - state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); - state->bases_dirty = true; - break; - - case CP_VAT_REG_A: - if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A); - } - state->vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value; - state->attr_dirty[sub_cmd & CP_VAT_MASK] = true; - break; - - case CP_VAT_REG_B: - if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B); - } - state->vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value; - state->attr_dirty[sub_cmd & CP_VAT_MASK] = true; - break; - - case CP_VAT_REG_C: - if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG) - { - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C); - } - state->vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value; - state->attr_dirty[sub_cmd & CP_VAT_MASK] = true; - break; - - // Pointers to vertex arrays in GC RAM - case ARRAY_BASE: - state->array_bases[static_cast(sub_cmd & CP_ARRAY_MASK)] = - value & CommandProcessor::GetPhysicalAddressMask(); - state->bases_dirty = true; - break; - - case ARRAY_STRIDE: - state->array_strides[static_cast(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF; - break; - - default: - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND); - WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value); - } -} - -void FillCPMemoryArray(u32* memory) -{ - memory[MATINDEX_A] = g_main_cp_state.matrix_index_a.Hex; - memory[MATINDEX_B] = g_main_cp_state.matrix_index_b.Hex; - memory[VCD_LO] = g_main_cp_state.vtx_desc.low.Hex; - memory[VCD_HI] = g_main_cp_state.vtx_desc.high.Hex; - - for (int i = 0; i < CP_NUM_VAT_REG; ++i) - { - memory[CP_VAT_REG_A + i] = g_main_cp_state.vtx_attr[i].g0.Hex; - memory[CP_VAT_REG_B + i] = g_main_cp_state.vtx_attr[i].g1.Hex; - memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex; - } - - for (u8 i = 0; i < CP_NUM_ARRAYS; ++i) - { - memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[static_cast(i)]; - memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[static_cast(i)]; - } -} diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index 32c5dbb607..4fb538b09e 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -454,10 +454,10 @@ struct XFMemory u32 unk9[8]; // 0x1048 - 0x104f PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057 }; -static_assert(sizeof(XFMemory) == sizeof(u32) * 0x1058); +static_assert(sizeof(XFMemory) == sizeof(u32) * XFMEM_REGISTERS_END); extern XFMemory xfmem; void LoadXFReg(u32 transferSize, u32 address, DataReader src); -void LoadIndexedXF(CPArray array, u32 val); -void PreprocessIndexedXF(CPArray array, u32 val); +void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size); +void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index c31b8cbb54..65ebd548b8 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -264,19 +264,9 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src) } } -constexpr std::tuple ExtractIndexedXF(u32 val) -{ - const u32 index = val >> 16; - const u32 address = val & 0xFFF; // check mask - const u32 size = ((val >> 12) & 0xF) + 1; - - return {index, address, size}; -} - // TODO - verify that it is correct. Seems to work, though. -void LoadIndexedXF(CPArray array, u32 val) +void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size) { - const auto [index, address, size] = ExtractIndexedXF(val); // load stuff from array to address in xf mem u32* currData = (u32*)(&xfmem) + address; @@ -307,10 +297,8 @@ void LoadIndexedXF(CPArray array, u32 val) } } -void PreprocessIndexedXF(CPArray array, u32 val) +void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size) { - const auto [index, address, size] = ExtractIndexedXF(val); - const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[array] + g_preprocess_cp_state.array_strides[array] * index); @@ -581,13 +569,9 @@ std::string GetXFMemDescription(u32 address, u32 value) } } -std::pair GetXFTransferInfo(const u8* data) +std::pair GetXFTransferInfo(u16 base_address, u8 transfer_size, + const u8* data) { - const u32 cmd = Common::swap32(data); - data += 4; - u32 base_address = cmd & 0xFFFF; - const u32 transfer_size = ((cmd >> 16) & 15) + 1; - if (base_address > XFMEM_REGISTERS_END) { return std::make_pair("Invalid XF Transfer", "Base address past end of address space"); @@ -655,10 +639,9 @@ std::pair GetXFTransferInfo(const u8* data) return std::make_pair(fmt::to_string(name), fmt::to_string(desc)); } -std::pair GetXFIndexedLoadInfo(CPArray array, u32 value) +std::pair GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address, + u8 size) { - const auto [index, address, size] = ExtractIndexedXF(value); - const auto desc = fmt::format("Load {} bytes to XF address {:03x} from CP array {} row {}", size, address, array, index); fmt::memory_buffer written; diff --git a/Source/Core/VideoCommon/XFStructs.h b/Source/Core/VideoCommon/XFStructs.h index 7e1cc2c49f..caf197b7f0 100644 --- a/Source/Core/VideoCommon/XFStructs.h +++ b/Source/Core/VideoCommon/XFStructs.h @@ -11,5 +11,7 @@ std::pair GetXFRegInfo(u32 address, u32 value); std::string GetXFMemName(u32 address); std::string GetXFMemDescription(u32 address, u32 value); -std::pair GetXFTransferInfo(const u8* data); -std::pair GetXFIndexedLoadInfo(CPArray array, u32 value); +std::pair GetXFTransferInfo(u16 base_address, u8 transfer_size, + const u8* data); +std::pair GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address, + u8 size); From d84d695fdf675a7668f23bf9db901cef9311a7df Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 6 May 2021 17:22:31 -0700 Subject: [PATCH 12/23] Remove DataReader from LoadXFReg --- Source/Core/Core/FifoPlayer/FifoRecorder.cpp | 4 +- Source/Core/VideoCommon/OpcodeDecoding.cpp | 4 +- Source/Core/VideoCommon/XFMemory.h | 4 +- Source/Core/VideoCommon/XFStructs.cpp | 111 ++++++++----------- 4 files changed, 50 insertions(+), 73 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp index 47cb25d84b..a47877ef4f 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp @@ -86,8 +86,8 @@ void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primiti } const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat, vtx_attr.g0.PosElements); - ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size, num_vertices, - vertex_data); + ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size, + num_vertices, vertex_data); offset += pos_size; const u32 norm_size = diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index 239aec158a..26e0f2da93 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -50,9 +50,7 @@ public: if constexpr (!is_preprocess) { - // HACK - LoadXFReg(count, address, - DataReader{const_cast(data), const_cast(data) + count * sizeof(u32)}); + LoadXFReg(address, count, data); INCSTAT(g_stats.this_frame.num_xf_loads); } diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index 4fb538b09e..189f695f41 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -13,8 +13,6 @@ #include "Common/EnumFormatter.h" #include "VideoCommon/CPMemory.h" -class DataReader; - constexpr size_t NUM_XF_COLOR_CHANNELS = 2; // Lighting @@ -458,6 +456,6 @@ static_assert(sizeof(XFMemory) == sizeof(u32) * XFMEM_REGISTERS_END); extern XFMemory xfmem; -void LoadXFReg(u32 transferSize, u32 address, DataReader src); +void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data); void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size); void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index 65ebd548b8..665e2ac22d 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -12,7 +12,6 @@ #include "Core/HW/Memmap.h" #include "VideoCommon/CPMemory.h" -#include "VideoCommon/DataReader.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelShaderManager.h" @@ -26,16 +25,10 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress) VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); } -static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) +static void XFRegWritten(u32 address, u32 value) { - u32 address = baseAddress; - u32 dataIndex = 0; - - while (transferSize > 0 && address < XFMEM_REGISTERS_END) + if (address >= XFMEM_REGISTERS_START && address < XFMEM_REGISTERS_END) { - u32 newValue = src.Peek(dataIndex * sizeof(u32)); - u32 nextAddress = address + 1; - switch (address) { case XFMEM_ERROR: @@ -44,12 +37,12 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_STATE1: // internal state 1 case XFMEM_CLOCK: case XFMEM_SETGPMETRIC: - nextAddress = 0x1007; + // Not implemented break; case XFMEM_CLIPDISABLE: { - ClipDisable setting{.hex = newValue}; + ClipDisable setting{.hex = value}; if (setting.disable_clipping_detection) DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::SETS_XF_CLIPDISABLE_BIT_0); if (setting.disable_trivial_rejection) @@ -63,7 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) break; case XFMEM_SETNUMCHAN: - if (xfmem.numChan.numColorChans != (newValue & 3)) + if (xfmem.numChan.numColorChans != (value & 3)) g_vertex_manager->Flush(); VertexShaderManager::SetLightingConfigChanged(); break; @@ -72,7 +65,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETCHAN1_AMBCOLOR: { u8 chan = address - XFMEM_SETCHAN0_AMBCOLOR; - if (xfmem.ambColor[chan] != newValue) + if (xfmem.ambColor[chan] != value) { g_vertex_manager->Flush(); VertexShaderManager::SetMaterialColorChanged(chan); @@ -84,7 +77,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETCHAN1_MATCOLOR: { u8 chan = address - XFMEM_SETCHAN0_MATCOLOR; - if (xfmem.matColor[chan] != newValue) + if (xfmem.matColor[chan] != value) { g_vertex_manager->Flush(); VertexShaderManager::SetMaterialColorChanged(chan + 2); @@ -96,22 +89,22 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETCHAN1_COLOR: case XFMEM_SETCHAN0_ALPHA: // Channel Alpha case XFMEM_SETCHAN1_ALPHA: - if (((u32*)&xfmem)[address] != (newValue & 0x7fff)) + if (((u32*)&xfmem)[address] != (value & 0x7fff)) g_vertex_manager->Flush(); VertexShaderManager::SetLightingConfigChanged(); break; case XFMEM_DUALTEX: - if (xfmem.dualTexTrans.enabled != bool(newValue & 1)) + if (xfmem.dualTexTrans.enabled != bool(value & 1)) g_vertex_manager->Flush(); VertexShaderManager::SetTexMatrixInfoChanged(-1); break; case XFMEM_SETMATRIXINDA: - VertexShaderManager::SetTexMatrixChangedA(newValue); + VertexShaderManager::SetTexMatrixChangedA(value); break; case XFMEM_SETMATRIXINDB: - VertexShaderManager::SetTexMatrixChangedB(newValue); + VertexShaderManager::SetTexMatrixChangedB(value); break; case XFMEM_SETVIEWPORT: @@ -124,8 +117,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) VertexShaderManager::SetViewportChanged(); PixelShaderManager::SetViewportChanged(); GeometryShaderManager::SetViewportChanged(); - - nextAddress = XFMEM_SETVIEWPORT + 6; break; case XFMEM_SETPROJECTION: @@ -138,12 +129,10 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) g_vertex_manager->Flush(); VertexShaderManager::SetProjectionChanged(); GeometryShaderManager::SetProjectionChanged(); - - nextAddress = XFMEM_SETPROJECTION + 7; break; case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens - if (xfmem.numTexGen.numTexGens != (newValue & 15)) + if (xfmem.numTexGen.numTexGens != (value & 15)) g_vertex_manager->Flush(); break; @@ -157,8 +146,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETTEXMTXINFO + 7: g_vertex_manager->Flush(); VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO); - - nextAddress = XFMEM_SETTEXMTXINFO + 8; break; case XFMEM_SETPOSTMTXINFO: @@ -171,8 +158,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case XFMEM_SETPOSTMTXINFO + 7: g_vertex_manager->Flush(); VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSTMTXINFO); - - nextAddress = XFMEM_SETPOSTMTXINFO + 8; break; // -------------- @@ -189,7 +174,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) case 0x104e: case 0x104f: DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); - DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, newValue); + DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, value); break; case 0x1013: @@ -200,66 +185,62 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) default: DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); - WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, newValue); + WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, value); break; } - - int transferred = nextAddress - address; - address = nextAddress; - - transferSize -= transferred; - dataIndex += transferred; } } -void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src) +void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data) { - // do not allow writes past registers - if (baseAddress + transferSize > XFMEM_REGISTERS_END) + if (base_address > XFMEM_REGISTERS_END) { - WARN_LOG_FMT(VIDEO, "XF load exceeds address space: {:x} {} bytes", baseAddress, transferSize); - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND); + WARN_LOG_FMT(VIDEO, "XF load base address past end of address space: {:x} {} bytes", + base_address, transfer_size); + return; + } - if (baseAddress >= XFMEM_REGISTERS_END) - transferSize = 0; - else - transferSize = XFMEM_REGISTERS_END - baseAddress; + u32 end_address = base_address + transfer_size; // exclusive + + // do not allow writes past registers + if (end_address > XFMEM_REGISTERS_END) + { + WARN_LOG_FMT(VIDEO, "XF load ends past end of address space: {:x} {} bytes", base_address, + transfer_size); + end_address = XFMEM_REGISTERS_END; } // write to XF mem - if (baseAddress < XFMEM_REGISTERS_START && transferSize > 0) + if (base_address < XFMEM_REGISTERS_START) { - u32 end = baseAddress + transferSize; + const u32 xf_mem_base = base_address; + u32 xf_mem_transfer_size = transfer_size; - u32 xfMemBase = baseAddress; - u32 xfMemTransferSize = transferSize; - - if (end >= XFMEM_REGISTERS_START) + if (end_address > XFMEM_REGISTERS_START) { - xfMemTransferSize = XFMEM_REGISTERS_START - baseAddress; - - baseAddress = XFMEM_REGISTERS_START; - transferSize = end - XFMEM_REGISTERS_START; - } - else - { - transferSize = 0; + xf_mem_transfer_size = XFMEM_REGISTERS_START - base_address; + base_address = XFMEM_REGISTERS_START; } - XFMemWritten(xfMemTransferSize, xfMemBase); - for (u32 i = 0; i < xfMemTransferSize; i++) + XFMemWritten(xf_mem_transfer_size, xf_mem_base); + for (u32 i = 0; i < xf_mem_transfer_size; i++) { - ((u32*)&xfmem)[xfMemBase + i] = src.Read(); + ((u32*)&xfmem)[xf_mem_base + i] = Common::swap32(data); + data += 4; } } // write to XF regs - if (transferSize > 0) + if (base_address >= XFMEM_REGISTERS_START) { - XFRegWritten(transferSize, baseAddress, src); - for (u32 i = 0; i < transferSize; i++) + for (u32 address = base_address; address < end_address; address++) { - ((u32*)&xfmem)[baseAddress + i] = src.Read(); + const u32 value = Common::swap32(data); + + XFRegWritten(address, value); + ((u32*)&xfmem)[address] = value; + + data += 4; } } } From e4605fa39955845145457aaa1f6ef227b7c6f78a Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 24 Aug 2021 12:18:28 -0700 Subject: [PATCH 13/23] Fifo analyzer: Create a new object for each EFB copy Previously, EFB copies would be in the middle of other objects, as objects were only split on primitive data. A distinct object for each EFB copy makes them easier to spot, but does also mean there are more objects that do nothing when disabled (as disabling an object only skips primitive data, and there is no primitive data for EFB copies). --- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 29 ++++++++++++++++----- Source/Core/Core/FifoPlayer/FifoPlayer.h | 4 ++- Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp | 17 ++++-------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index bbb08a7ddd..58add3faaa 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -42,7 +42,7 @@ public: OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {} OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); } - OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {} + OPCODE_CALLBACK(void OnBP(u8 command, u32 value)); OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {} OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size, u16 num_vertices, @@ -57,9 +57,11 @@ public: bool m_start_of_primitives = false; bool m_end_of_primitives = false; + bool m_efb_copy = false; // Internal state, copied to above in OnCommand bool m_was_primitive = false; bool m_is_primitive = false; + bool m_is_copy = false; bool m_is_nop = false; CPState m_cpmem; }; @@ -103,18 +105,27 @@ void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file, } offset += cmd_size; + + if (analyzer.m_efb_copy) + { + // We increase the offset beforehand, so that the trigger EFB copy command is included. + analyzed.AddPart(FramePartType::EFBCopy, part_start, offset, analyzer.m_cpmem); + part_start = offset; + } } - if (part_start != offset) - { - // Remaining data, usually without any primitives - analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem); - } - + // The frame should end with an EFB copy, so part_start should have been updated to the end. + ASSERT(part_start == frame.fifoData.size()); ASSERT(offset == frame.fifoData.size()); } } +void FifoPlaybackAnalyzer::OnBP(u8 command, u32 value) +{ + if (command == BPMEM_TRIGGER_EFB_COPY) + m_is_copy = true; +} + void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size, u16 num_vertices, const u8* vertex_data) @@ -131,6 +142,7 @@ void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size) { m_start_of_primitives = false; m_end_of_primitives = false; + m_efb_copy = false; if (!m_is_nop) { @@ -138,10 +150,13 @@ void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size) m_start_of_primitives = true; else if (m_was_primitive && !m_is_primitive) m_end_of_primitives = true; + else if (m_is_copy) + m_efb_copy = true; m_was_primitive = m_is_primitive; } m_is_primitive = false; + m_is_copy = false; m_is_nop = false; } } // namespace diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.h b/Source/Core/Core/FifoPlayer/FifoPlayer.h index ffae2e92d4..4e2e0ffed7 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.h +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -56,6 +57,7 @@ enum class FramePartType { Commands, PrimitiveData, + EFBCopy, }; struct FramePart @@ -74,7 +76,7 @@ struct FramePart struct AnalyzedFrameInfo { std::vector parts; - Common::EnumMap part_type_counts; + Common::EnumMap part_type_counts; void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem) { diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 9135d70e09..4f76f2d263 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -160,7 +160,7 @@ void FIFOAnalyzer::UpdateTree() const AnalyzedFrameInfo& frame_info = FifoPlayer::GetInstance().GetAnalyzedFrameInfo(frame); ASSERT(frame_info.parts.size() != 0); - Common::EnumMap part_counts; + Common::EnumMap part_counts; u32 part_start = 0; for (u32 part_nr = 0; part_nr < frame_info.parts.size(); part_nr++) @@ -173,6 +173,8 @@ void FIFOAnalyzer::UpdateTree() QTreeWidgetItem* object_item = nullptr; if (part.m_type == FramePartType::PrimitiveData) object_item = new QTreeWidgetItem({tr("Object %1").arg(part_type_nr)}); + else if (part.m_type == FramePartType::EFBCopy) + object_item = new QTreeWidgetItem({tr("EFB copy %1").arg(part_type_nr)}); // We don't create dedicated labels for FramePartType::Command; // those are grouped with the primitive @@ -188,17 +190,8 @@ void FIFOAnalyzer::UpdateTree() } } - // Final data (the XFB copy) - if (part_start != frame_info.parts.size()) - { - QTreeWidgetItem* object_item = new QTreeWidgetItem({tr("Final Data")}); - frame_item->addChild(object_item); - - object_item->setData(0, FRAME_ROLE, frame); - object_item->setData(0, PART_START_ROLE, part_start); - object_item->setData(0, PART_END_ROLE, u32(frame_info.parts.size() - 1)); - } - + // We shouldn't end on a Command (it should end with an EFB copy) + ASSERT(part_start == frame_info.parts.size()); // The counts we computed should match the frame's counts ASSERT(std::equal(frame_info.part_type_counts.begin(), frame_info.part_type_counts.end(), part_counts.begin())); From d039b1bc0dfaba2a88036468b6d971bb1d7e463d Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 13 May 2021 16:05:31 -0700 Subject: [PATCH 14/23] VideoCommon: Move VertexLoaderManager logic out of CPState --- Source/Core/VideoCommon/CPMemory.cpp | 11 ++----- Source/Core/VideoCommon/CPMemory.h | 6 ---- Source/Core/VideoCommon/OpcodeDecoding.cpp | 33 ++++++++++++++++--- Source/Core/VideoCommon/VertexLoaderARM64.cpp | 2 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 24 +++++++------- Source/Core/VideoCommon/VertexLoaderManager.h | 8 +++++ .../Core/VideoCommon/VertexShaderManager.cpp | 1 + 7 files changed, 54 insertions(+), 31 deletions(-) diff --git a/Source/Core/VideoCommon/CPMemory.cpp b/Source/Core/VideoCommon/CPMemory.cpp index 4781595e8c..0df0d9f1d8 100644 --- a/Source/Core/VideoCommon/CPMemory.cpp +++ b/Source/Core/VideoCommon/CPMemory.cpp @@ -9,6 +9,7 @@ #include "Common/Logging/Log.h" #include "Core/DolphinAnalytics.h" #include "VideoCommon/CommandProcessor.h" +#include "VideoCommon/VertexLoaderManager.h" // CP state CPState g_main_cp_state; @@ -28,7 +29,7 @@ void DoCPState(PointerWrap& p) if (p.mode == PointerWrap::MODE_READ) { CopyPreprocessCPStateFromMain(); - g_main_cp_state.bases_dirty = true; + VertexLoaderManager::g_bases_dirty = true; } } @@ -154,8 +155,6 @@ void CPState::LoadCPReg(u8 sub_cmd, u32 value) } vtx_desc.low.Hex = value; - attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); - bases_dirty = true; break; case VCD_HI: @@ -169,8 +168,6 @@ void CPState::LoadCPReg(u8 sub_cmd, u32 value) } vtx_desc.high.Hex = value; - attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG); - bases_dirty = true; break; case CP_VAT_REG_A: @@ -180,7 +177,6 @@ void CPState::LoadCPReg(u8 sub_cmd, u32 value) WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A); } vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value; - attr_dirty[sub_cmd & CP_VAT_MASK] = true; break; case CP_VAT_REG_B: @@ -190,7 +186,6 @@ void CPState::LoadCPReg(u8 sub_cmd, u32 value) WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B); } vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value; - attr_dirty[sub_cmd & CP_VAT_MASK] = true; break; case CP_VAT_REG_C: @@ -200,14 +195,12 @@ void CPState::LoadCPReg(u8 sub_cmd, u32 value) WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C); } vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value; - attr_dirty[sub_cmd & CP_VAT_MASK] = true; break; // Pointers to vertex arrays in GC RAM case ARRAY_BASE: array_bases[static_cast(sub_cmd & CP_ARRAY_MASK)] = value & CommandProcessor::GetPhysicalAddressMask(); - bases_dirty = true; break; case ARRAY_STRIDE: diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index defac506b2..52b7038e7c 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -646,12 +646,6 @@ struct CPState final TVtxDesc vtx_desc; // Most games only use the first VtxAttr and simply reconfigure it all the time as needed. std::array vtx_attr{}; - - // Attributes that actually belong to VertexLoaderManager: - BitSet32 attr_dirty{}; - bool bases_dirty = false; - VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{}; - int last_id = 0; }; static_assert(std::is_trivially_copyable_v); diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index 26e0f2da93..cefc88239d 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -58,17 +58,42 @@ public: OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { m_cycles += 12; + const u8 sub_command = command & CP_COMMAND_MASK; if constexpr (!is_preprocess) { - // TODO: Move all dirty state checking here or to VertexLoaderManager, - // instead of it being in CPState - if (command == MATINDEX_A) + if (sub_command == MATINDEX_A) VertexShaderManager::SetTexMatrixChangedA(value); - else if (command == MATINDEX_B) + else if (sub_command == MATINDEX_B) VertexShaderManager::SetTexMatrixChangedB(value); + else if (sub_command == VCD_LO || sub_command == VCD_HI) + { + VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG); + VertexLoaderManager::g_bases_dirty = true; + } + else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B || + sub_command == CP_VAT_REG_C) + { + VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true; + } + else if (sub_command == ARRAY_BASE) + { + VertexLoaderManager::g_bases_dirty = true; + } INCSTAT(g_stats.this_frame.num_cp_loads); } + else if constexpr (is_preprocess) + { + if (sub_command == VCD_LO || sub_command == VCD_HI) + { + VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG); + } + else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B || + sub_command == CP_VAT_REG_C) + { + VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true; + } + } GetCPState().LoadCPReg(command, value); } OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 75afefed16..106f1cfd9e 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -406,7 +406,7 @@ void VertexLoaderARM64::GenerateVertexLoader() MOV(saved_count, count_reg); MOVP2R(stride_reg, g_main_cp_state.array_strides.data()); - MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases); + MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data()); if (need_scale) MOVP2R(scale_reg, scale_factors); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 901e14044f..ef1e2f7aee 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -58,9 +58,9 @@ std::array g_preprocess_vertex_loaders; void Init() { MarkAllDirty(); - for (auto& map_entry : g_main_cp_state.vertex_loaders) + for (auto& map_entry : g_main_vertex_loaders) map_entry = nullptr; - for (auto& map_entry : g_preprocess_cp_state.vertex_loaders) + for (auto& map_entry : g_preprocess_vertex_loaders) map_entry = nullptr; SETSTAT(g_stats.num_vertex_loaders, 0); } @@ -75,7 +75,7 @@ void Clear() void UpdateVertexArrayPointers() { // Anything to update? - if (!g_main_cp_state.bases_dirty) + if (!g_bases_dirty) return; // Some games such as Burnout 2 can put invalid addresses into @@ -106,7 +106,7 @@ void UpdateVertexArrayPointers() Memory::GetPointer(g_main_cp_state.array_bases[CPArray::TexCoord0 + i]); } - g_main_cp_state.bases_dirty = false; + g_bases_dirty = false; } namespace @@ -121,8 +121,8 @@ struct entry void MarkAllDirty() { - g_main_cp_state.attr_dirty = BitSet32::AllTrue(8); - g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8); + g_main_vat_dirty = BitSet8::AllTrue(8); + g_preprocess_vat_dirty = BitSet8::AllTrue(8); } NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl) @@ -197,10 +197,12 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false) { CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state; - state->last_id = vtx_attr_group; + BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty; + auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders; + g_current_vat = vtx_attr_group; VertexLoaderBase* loader; - if (state->attr_dirty[vtx_attr_group]) + if (attr_dirty[vtx_attr_group]) { // We are not allowed to create a native vertex format on preprocessing as this is on the wrong // thread @@ -230,12 +232,12 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal native = g_renderer->CreateNativeVertexFormat(format); loader->m_native_vertex_format = native.get(); } - state->vertex_loaders[vtx_attr_group] = loader; - state->attr_dirty[vtx_attr_group] = false; + vertex_loaders[vtx_attr_group] = loader; + attr_dirty[vtx_attr_group] = false; } else { - loader = state->vertex_loaders[vtx_attr_group]; + loader = vertex_loaders[vtx_attr_group]; } // Lookup pointers for any vertex arrays. diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index 4eeae12742..d6bda13c00 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -57,4 +58,11 @@ extern u32 position_matrix_index[4]; // VB_HAS_X. Bitmask telling what vertex components are present. extern u32 g_current_components; + +extern BitSet8 g_main_vat_dirty; +extern BitSet8 g_preprocess_vat_dirty; +extern bool g_bases_dirty; // Main only +extern u8 g_current_vat; // Main only +extern std::array g_main_vertex_loaders; +extern std::array g_preprocess_vertex_loaders; } // namespace VertexLoaderManager diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 1b0bf55de7..25c7f79b30 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -23,6 +23,7 @@ #include "VideoCommon/FreeLookCamera.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" From 0afe318b55fd71688f2a8b6c19c50830796dbb01 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 16 May 2021 14:41:03 -0700 Subject: [PATCH 15/23] OpcodeDecoding: Make s_is_fifo_error_seen static --- Source/Core/VideoCommon/OpcodeDecoding.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index cefc88239d..63734db384 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -32,7 +32,7 @@ namespace OpcodeDecoder { -bool s_is_fifo_error_seen = false; +static bool s_is_fifo_error_seen = false; bool g_record_fifo_data = false; void Init() From 95e0f833f9fd16621cfd32536698aa9d98177e22 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 12 Jun 2021 15:06:06 -0700 Subject: [PATCH 16/23] Fifo analyzer: Display equations for color/alpha combiners --- Source/Core/Common/EnumFormatter.h | 21 ++- Source/Core/VideoCommon/BPMemory.h | 165 +++++++++++++++++- Source/UnitTests/Common/EnumFormatterTest.cpp | 13 ++ 3 files changed, 189 insertions(+), 10 deletions(-) diff --git a/Source/Core/Common/EnumFormatter.h b/Source/Core/Common/EnumFormatter.h index 1ab6bbeadd..26cc910438 100644 --- a/Source/Core/Common/EnumFormatter.h +++ b/Source/Core/Common/EnumFormatter.h @@ -55,9 +55,9 @@ public: constexpr auto parse(fmt::format_parse_context& ctx) { auto it = ctx.begin(), end = ctx.end(); - // 'u' for user display, 's' for shader generation - if (it != end && (*it == 'u' || *it == 's')) - formatting_for_shader = (*it++ == 's'); + // 'u' for user display, 's' for shader generation, 'n' for name only + if (it != end && (*it == 'u' || *it == 's' || *it == 'n')) + format_type = *it++; return it; } @@ -68,19 +68,24 @@ public: const auto value_u = static_cast>(value_s); // Always unsigned const bool has_name = m_names.InBounds(e) && m_names[e] != nullptr; - if (!formatting_for_shader) + switch (format_type) { + default: + case 'u': if (has_name) return fmt::format_to(ctx.out(), "{} ({})", m_names[e], value_s); else return fmt::format_to(ctx.out(), "Invalid ({})", value_s); - } - else - { + case 's': if (has_name) return fmt::format_to(ctx.out(), "{:#x}u /* {} */", value_u, m_names[e]); else return fmt::format_to(ctx.out(), "{:#x}u /* Invalid */", value_u); + case 'n': + if (has_name) + return fmt::format_to(ctx.out(), "{}", m_names[e]); + else + return fmt::format_to(ctx.out(), "Invalid ({})", value_s); } } @@ -92,5 +97,5 @@ protected: private: const array_type m_names; - bool formatting_for_shader = false; + char format_type = 'u'; }; diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 97dc5993bd..f5009875e7 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -258,7 +258,7 @@ enum class TevBias : u32 { Zero = 0, AddHalf = 1, - Subhalf = 2, + SubHalf = 2, Compare = 3 }; template <> @@ -491,6 +491,94 @@ struct fmt::formatter template auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx) { + auto out = ctx.out(); + if (cc.bias != TevBias::Compare) + { + // Generate an equation view, simplifying out addition of zero and multiplication by 1 + // dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale + // or equivalently and more readably when the terms are not constants: + // dest = (d (OP) lerp(a, b, c) + bias) * scale + // Note that lerping is more complex than the first form shows; see PixelShaderGen's + // WriteTevRegular for more details. + + static constexpr Common::EnumMap alt_names = { + "prev.rgb", "prev.aaa", "c0.rgb", "c0.aaa", "c1.rgb", "c1.aaa", "c2.rgb", "c2.aaa", + "tex.rgb", "tex.aaa", "ras.rgb", "ras.aaa", "1", ".5", "konst.rgb", "0", + }; + + const bool has_d = cc.d != TevColorArg::Zero; + // If c is one, (1 - c) is zero, so (1-c)*a is zero + const bool has_ac = cc.a != TevColorArg::Zero && cc.c != TevColorArg::One; + // If either b or c is zero, b*c is zero + const bool has_bc = cc.b != TevColorArg::Zero && cc.c != TevColorArg::Zero; + const bool has_bias = cc.bias != TevBias::Zero; // != Compare is already known + const bool has_scale = cc.scale != TevScale::Scale1; + + const char op = (cc.op == TevOp::Sub ? '-' : '+'); + + if (cc.dest == TevOutput::Prev) + out = format_to(out, "dest.rgb = "); + else + out = format_to(out, "{:n}.rgb = ", cc.dest); + + if (has_scale) + out = format_to(out, "("); + if (has_d) + out = format_to(out, "{}", alt_names[cc.d]); + if (has_ac || has_bc) + { + if (has_d) + out = format_to(out, " {} ", op); + else if (cc.op == TevOp::Sub) + out = format_to(out, "{}", op); + if (has_ac && has_bc) + { + if (cc.c == TevColorArg::Half) + { + // has_a and has_b imply that c is not Zero or One, and Half is the only remaining + // numeric constant. This results in an average. + out = format_to(out, "({} + {})/2", alt_names[cc.a], alt_names[cc.b]); + } + else + { + out = format_to(out, "lerp({}, {}, {})", alt_names[cc.a], alt_names[cc.b], + alt_names[cc.c]); + } + } + else if (has_ac) + { + if (cc.c == TevColorArg::Zero) + out = format_to(out, "{}", alt_names[cc.a]); + else if (cc.c == TevColorArg::Half) // 1 - .5 is .5 + out = format_to(out, ".5*{}", alt_names[cc.a]); + else + out = format_to(out, "(1 - {})*{}", alt_names[cc.c], alt_names[cc.a]); + } + else // has_bc + { + if (cc.c == TevColorArg::One) + out = format_to(out, "{}", alt_names[cc.b]); + else + out = format_to(out, "{}*{}", alt_names[cc.c], alt_names[cc.b]); + } + } + if (has_bias) + { + if (has_ac || has_bc || has_d) + out = format_to(out, cc.bias == TevBias::AddHalf ? " + .5" : " - .5"); + else + out = format_to(out, cc.bias == TevBias::AddHalf ? ".5" : "-.5"); + } + else + { + // If nothing has been written so far, add a zero + if (!(has_ac || has_bc || has_d)) + out = format_to(out, "0"); + } + if (has_scale) + out = format_to(out, ") * {:n}", cc.scale); + out = format_to(out, "\n\n"); + } return format_to(ctx.out(), "a: {}\n" "b: {}\n" @@ -512,7 +600,80 @@ struct fmt::formatter template auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx) { - return format_to(ctx.out(), + auto out = ctx.out(); + if (ac.bias != TevBias::Compare) + { + // Generate an equation view, simplifying out addition of zero and multiplication by 1 + // dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale + // or equivalently and more readably when the terms are not constants: + // dest = (d (OP) lerp(a, b, c) + bias) * scale + // Note that lerping is more complex than the first form shows; see PixelShaderGen's + // WriteTevRegular for more details. + + // We don't need an alt_names map here, unlike the color combiner, as the only special term is + // Zero, and we we filter that out below. However, we do need to append ".a" to all + // parameters, to make it explicit that these are operations on the alpha term instead of the + // 4-element vector. We also need to use the :n specifier so that the numeric ID isn't shown. + + const bool has_d = ac.d != TevAlphaArg::Zero; + // There is no c value for alpha that results in (1 - c) always being zero + const bool has_ac = ac.a != TevAlphaArg::Zero; + // If either b or c is zero, b*c is zero + const bool has_bc = ac.b != TevAlphaArg::Zero && ac.c != TevAlphaArg::Zero; + const bool has_bias = ac.bias != TevBias::Zero; // != Compare is already known + const bool has_scale = ac.scale != TevScale::Scale1; + + const char op = (ac.op == TevOp::Sub ? '-' : '+'); + + if (ac.dest == TevOutput::Prev) + out = format_to(out, "dest.a = "); + else + out = format_to(out, "{:n}.a = ", ac.dest); + + if (has_scale) + out = format_to(out, "("); + if (has_d) + out = format_to(out, "{:n}.a", ac.d); + if (has_ac || has_bc) + { + if (has_d) + out = format_to(out, " {} ", op); + else if (ac.op == TevOp::Sub) + out = format_to(out, "{}", op); + if (has_ac && has_bc) + { + out = format_to(out, "lerp({:n}.a, {:n}.a, {:n}.a)", ac.a, ac.b, ac.c); + } + else if (has_ac) + { + if (ac.c == TevAlphaArg::Zero) + out = format_to(out, "{:n}.a", ac.a); + else + out = format_to(out, "(1 - {:n}.a)*{:n}.a", ac.c, ac.a); + } + else // has_bc + { + out = format_to(out, "{:n}.a*{:n}.a", ac.c, ac.b); + } + } + if (has_bias) + { + if (has_ac || has_bc || has_d) + out = format_to(out, ac.bias == TevBias::AddHalf ? " + .5" : " - .5"); + else + out = format_to(out, ac.bias == TevBias::AddHalf ? ".5" : "-.5"); + } + else + { + // If nothing has been written so far, add a zero + if (!(has_ac || has_bc || has_d)) + out = format_to(out, "0"); + } + if (has_scale) + out = format_to(out, ") * {:n}", ac.scale); + out = format_to(out, "\n\n"); + } + return format_to(out, "a: {}\n" "b: {}\n" "c: {}\n" diff --git a/Source/UnitTests/Common/EnumFormatterTest.cpp b/Source/UnitTests/Common/EnumFormatterTest.cpp index 55e03152ef..793328b0dd 100644 --- a/Source/UnitTests/Common/EnumFormatterTest.cpp +++ b/Source/UnitTests/Common/EnumFormatterTest.cpp @@ -46,6 +46,12 @@ TEST(EnumUtil, Enum1) EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */"); EXPECT_EQ(fmt::format("{:s}", static_cast(3)), "0x3u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast(4)), "0x4u /* Invalid */"); + + EXPECT_EQ(fmt::format("{:n}", Enum1::A), "A"); + EXPECT_EQ(fmt::format("{:n}", Enum1::B), "B"); + EXPECT_EQ(fmt::format("{:n}", Enum1::C), "C"); + EXPECT_EQ(fmt::format("{:n}", static_cast(3)), "Invalid (3)"); + EXPECT_EQ(fmt::format("{:n}", static_cast(4)), "Invalid (4)"); } TEST(EnumUtil, Enum2) @@ -63,4 +69,11 @@ TEST(EnumUtil, Enum2) EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */"); EXPECT_EQ(fmt::format("{:s}", static_cast(4)), "0x4u /* Invalid */"); EXPECT_EQ(fmt::format("{:s}", static_cast(-1)), "0xffffffffu /* Invalid */"); + + EXPECT_EQ(fmt::format("{:n}", Enum2::D), "D"); + EXPECT_EQ(fmt::format("{:n}", Enum2::E), "E"); + EXPECT_EQ(fmt::format("{:n}", static_cast(2)), "Invalid (2)"); + EXPECT_EQ(fmt::format("{:n}", Enum2::F), "F"); + EXPECT_EQ(fmt::format("{:n}", static_cast(4)), "Invalid (4)"); + EXPECT_EQ(fmt::format("{:n}", static_cast(-1)), "Invalid (-1)"); } From 1a964891f83ef9332c425dd69fe2f277b8ceb992 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 20 Jun 2021 12:49:24 -0700 Subject: [PATCH 17/23] VertexLoader_Color: Use Common::swap24 --- Source/Core/VideoCommon/VertexLoader_Color.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 36939557b8..b41272cbf1 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -125,8 +125,8 @@ void Color_ReadIndex_24b_6666(VertexLoader* loader) { const auto index = DataRead(); const u8* data = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] + - (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]) - 1; - const u32 val = Common::swap32(data); + (index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]); + const u32 val = Common::swap24(data); SetCol6666(loader, val); } @@ -167,7 +167,7 @@ void Color_ReadDirect_16b_4444(VertexLoader* loader) void Color_ReadDirect_24b_6666(VertexLoader* loader) { - SetCol6666(loader, Common::swap32(DataGetPosition() - 1)); + SetCol6666(loader, Common::swap24(DataGetPosition())); DataSkip(3); } From 27cb7044669c62922be6f68913007d2567ce2480 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 26 Jun 2021 12:48:28 -0700 Subject: [PATCH 18/23] Eliminate VarType for ComponentFormat --- .../D3D/D3DNativeVertexFormat.cpp | 118 +++++++++++------- .../VideoBackends/D3D12/DX12VertexFormat.cpp | 50 ++++---- .../OGL/OGLNativeVertexFormat.cpp | 8 +- .../VideoBackends/Software/SWVertexLoader.cpp | 14 +-- .../VideoBackends/Vulkan/VKVertexFormat.cpp | 50 ++++---- .../Core/VideoCommon/FramebufferManager.cpp | 4 +- Source/Core/VideoCommon/NativeVertexFormat.h | 12 +- Source/Core/VideoCommon/RenderBase.cpp | 6 +- Source/Core/VideoCommon/ShaderCache.cpp | 2 +- Source/Core/VideoCommon/VertexLoader.cpp | 10 +- Source/Core/VideoCommon/VertexLoaderARM64.cpp | 12 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 13 +- Source/Core/VideoCommon/VertexLoaderX64.cpp | 8 +- 13 files changed, 166 insertions(+), 141 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp index 80a59b57b3..8f7d9cad12 100644 --- a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp @@ -3,6 +3,8 @@ #include +#include "Common/EnumMap.h" + #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DRender.h" #include "VideoBackends/D3D/D3DState.h" @@ -20,55 +22,75 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) return std::make_unique(vtx_decl); } -static const DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = { - // float formats - DXGI_FORMAT_R8_UNORM, - DXGI_FORMAT_R8_SNORM, - DXGI_FORMAT_R16_UNORM, - DXGI_FORMAT_R16_SNORM, - DXGI_FORMAT_R32_FLOAT, - DXGI_FORMAT_R8G8_UNORM, - DXGI_FORMAT_R8G8_SNORM, - DXGI_FORMAT_R16G16_UNORM, - DXGI_FORMAT_R16G16_SNORM, - DXGI_FORMAT_R32G32_FLOAT, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_SNORM, - DXGI_FORMAT_R16G16B16A16_UNORM, - DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R32G32B32A32_FLOAT, - - // integer formats - DXGI_FORMAT_R8_UINT, - DXGI_FORMAT_R8_SINT, - DXGI_FORMAT_R16_UINT, - DXGI_FORMAT_R16_SINT, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_R8G8_UINT, - DXGI_FORMAT_R8G8_SINT, - DXGI_FORMAT_R16G16_UINT, - DXGI_FORMAT_R16G16_SINT, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_R8G8B8A8_UINT, - DXGI_FORMAT_R8G8B8A8_SINT, - DXGI_FORMAT_R16G16B16A16_UINT, - DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_UNKNOWN, -}; - -DXGI_FORMAT VarToD3D(VarType t, int size, bool integer) +DXGI_FORMAT VarToD3D(ComponentFormat t, int size, bool integer) { - DXGI_FORMAT retval = d3d_format_lookup[(int)t + 5 * (size - 1) + 5 * 4 * (int)integer]; + using FormatMap = Common::EnumMap; + static constexpr auto f = [](FormatMap a) { return a; }; // Deduction helper + + static constexpr std::array d3d_float_format_lookup = { + f({ + DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R8_SNORM, + DXGI_FORMAT_R16_UNORM, + DXGI_FORMAT_R16_SNORM, + DXGI_FORMAT_R32_FLOAT, + }), + f({ + DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R8G8_SNORM, + DXGI_FORMAT_R16G16_UNORM, + DXGI_FORMAT_R16G16_SNORM, + DXGI_FORMAT_R32G32_FLOAT, + }), + f({ + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_R32G32B32_FLOAT, + }), + f({ + DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R32G32B32A32_FLOAT, + }), + }; + + static constexpr std::array d3d_integer_format_lookup = { + f({ + DXGI_FORMAT_R8_UINT, + DXGI_FORMAT_R8_SINT, + DXGI_FORMAT_R16_UINT, + DXGI_FORMAT_R16_SINT, + DXGI_FORMAT_UNKNOWN, + }), + f({ + DXGI_FORMAT_R8G8_UINT, + DXGI_FORMAT_R8G8_SINT, + DXGI_FORMAT_R16G16_UINT, + DXGI_FORMAT_R16G16_SINT, + DXGI_FORMAT_UNKNOWN, + }), + f({ + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + }), + f({ + DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_UNKNOWN, + }), + }; + + DXGI_FORMAT retval = + integer ? d3d_integer_format_lookup[size - 1][t] : d3d_float_format_lookup[size - 1][t]; if (retval == DXGI_FORMAT_UNKNOWN) { PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer); diff --git a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp index d95c8b9f7f..bd818d1a66 100644 --- a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp @@ -4,39 +4,43 @@ #include "VideoBackends/D3D12/DX12VertexFormat.h" #include "Common/Assert.h" +#include "Common/EnumMap.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderGen.h" namespace DX12 { -static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer) +static DXGI_FORMAT VarToDXGIFormat(ComponentFormat t, u32 components, bool integer) { + using ComponentArray = std::array; + static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper + // NOTE: 3-component formats are not valid. - static const DXGI_FORMAT float_type_lookup[][4] = { - {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE - {DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM, - DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE - {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, - DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT - {DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM, - DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT + static constexpr Common::EnumMap float_type_lookup = { + f({DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UNORM}), // UByte + f({DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM, + DXGI_FORMAT_R8G8B8A8_SNORM}), // Byte + f({DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, + DXGI_FORMAT_R16G16B16A16_UNORM}), // UShort + f({DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM, + DXGI_FORMAT_R16G16B16A16_SNORM}), // Short + f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float }; - static const DXGI_FORMAT integer_type_lookup[][4] = { - {DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT, - DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE - {DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT, - DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE - {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT, - DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT - {DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT, - DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, - DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT + static constexpr Common::EnumMap integer_type_lookup = { + f({DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT, + DXGI_FORMAT_R8G8B8A8_UINT}), // UByte + f({DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT, + DXGI_FORMAT_R8G8B8A8_SINT}), // Byte + f({DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT, + DXGI_FORMAT_R16G16B16A16_UINT}), // UShort + f({DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT, + DXGI_FORMAT_R16G16B16A16_SINT}), // Short + f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float }; ASSERT(components > 0 && components <= 4); diff --git a/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp index 9a6e568c8d..d21a40ebc1 100644 --- a/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "Common/CommonTypes.h" +#include "Common/EnumMap.h" #include "Common/GL/GLUtil.h" #include "Common/MsgHandler.h" @@ -23,10 +24,11 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) return std::make_unique(vtx_decl); } -static inline GLuint VarToGL(VarType t) +static inline GLuint VarToGL(ComponentFormat t) { - static const GLuint lookup[5] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, - GL_FLOAT}; + static constexpr Common::EnumMap lookup = { + GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, GL_FLOAT, + }; return lookup[t]; } diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index a7d0506a1f..af99e5ba85 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -145,7 +145,7 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f if (format.enable) { src.Skip(format.offset); - src.Skip(base_component * (1 << (format.type >> 1))); + src.Skip(base_component * GetElementSize(format.type)); int i; for (i = 0; i < std::min(format.components - base_component, components); i++) @@ -153,24 +153,24 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f int i_dst = reverse ? components - i - 1 : i; switch (format.type) { - case VAR_UNSIGNED_BYTE: + case ComponentFormat::UByte: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_BYTE: + case ComponentFormat::Byte: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_UNSIGNED_SHORT: + case ComponentFormat::UShort: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_SHORT: + case ComponentFormat::Short: dst[i_dst] = ReadNormalized(src.Read()); break; - case VAR_FLOAT: + case ComponentFormat::Float: dst[i_dst] = ReadNormalized(src.Read()); break; } - ASSERT_MSG(VIDEO, !format.integer || format.type != VAR_FLOAT, + ASSERT_MSG(VIDEO, !format.integer || format.type != ComponentFormat::Float, "only non-float values are allowed to be streamed as integer"); } for (; i < components; i++) diff --git a/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp index e06beafbb8..5f53547066 100644 --- a/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp @@ -4,6 +4,7 @@ #include "VideoBackends/Vulkan/VKVertexFormat.h" #include "Common/Assert.h" +#include "Common/EnumMap.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" @@ -13,32 +14,35 @@ namespace Vulkan { -static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer) +static VkFormat VarToVkFormat(ComponentFormat t, uint32_t components, bool integer) { - static const VkFormat float_type_lookup[][4] = { - {VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, - VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE - {VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM, - VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE - {VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, - VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT - {VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM, - VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT - {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, - VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT + using ComponentArray = std::array; + static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper + + static constexpr Common::EnumMap float_type_lookup = { + f({VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8A8_UNORM}), // UByte + f({VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM, + VK_FORMAT_R8G8B8A8_SNORM}), // Byte + f({VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, + VK_FORMAT_R16G16B16A16_UNORM}), // UShort + f({VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16A16_SNORM}), // Short + f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT}), // Float }; - static const VkFormat integer_type_lookup[][4] = { - {VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, - VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE - {VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, - VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE - {VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, - VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT - {VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, - VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT - {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, - VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT + static constexpr Common::EnumMap integer_type_lookup = { + f({VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, + VK_FORMAT_R8G8B8A8_UINT}), // UByte + f({VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, + VK_FORMAT_R8G8B8A8_SINT}), // Byte + f({VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, + VK_FORMAT_R16G16B16A16_UINT}), // UShort + f({VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, + VK_FORMAT_R16G16B16A16_SINT}), // Short + f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT}), // Float }; ASSERT(components > 0 && components <= 4); diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index 211adda800..43c213f8ba 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -837,12 +837,12 @@ bool FramebufferManager::CompilePokePipelines() { PortableVertexDeclaration vtx_decl = {}; vtx_decl.position.enable = true; - vtx_decl.position.type = VAR_FLOAT; + vtx_decl.position.type = ComponentFormat::Float; vtx_decl.position.components = 4; vtx_decl.position.integer = false; vtx_decl.position.offset = offsetof(EFBPokeVertex, position); vtx_decl.colors[0].enable = true; - vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; + vtx_decl.colors[0].type = ComponentFormat::UByte; vtx_decl.colors[0].components = 4; vtx_decl.colors[0].integer = false; vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index e4aa0f7e61..7bbf0bd38c 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -8,6 +8,7 @@ #include "Common/CommonTypes.h" #include "Common/Hash.h" +#include "VideoCommon/CPMemory.h" // m_components enum @@ -45,18 +46,9 @@ enum VB_HAS_UVTEXMTXSHIFT = 13, }; -enum VarType -{ - VAR_UNSIGNED_BYTE, // GX_U8 = 0 - VAR_BYTE, // GX_S8 = 1 - VAR_UNSIGNED_SHORT, // GX_U16 = 2 - VAR_SHORT, // GX_S16 = 3 - VAR_FLOAT, // GX_F32 = 4 -}; - struct AttributeFormat { - VarType type; + ComponentFormat type; int components; int offset; bool enable; diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 16d36f7453..31cfd36aee 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -986,9 +986,9 @@ bool Renderer::InitializeImGui() ImGui::GetStyle().WindowRounding = 7.0f; PortableVertexDeclaration vdecl = {}; - vdecl.position = {VAR_FLOAT, 2, offsetof(ImDrawVert, pos), true, false}; - vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false}; - vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false}; + vdecl.position = {ComponentFormat::Float, 2, offsetof(ImDrawVert, pos), true, false}; + vdecl.texcoords[0] = {ComponentFormat::Float, 2, offsetof(ImDrawVert, uv), true, false}; + vdecl.colors[0] = {ComponentFormat::UByte, 4, offsetof(ImDrawVert, col), true, false}; vdecl.stride = sizeof(ImDrawVert); m_imgui_vertex_format = CreateNativeVertexFormat(vdecl); if (!m_imgui_vertex_format) diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 8b1196cabc..b72dd238ff 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -1095,7 +1095,7 @@ void ShaderCache::QueueUberShaderPipelines() // All attributes will be enabled in GetUberVertexFormat. PortableVertexDeclaration dummy_vertex_decl = {}; dummy_vertex_decl.position.components = 4; - dummy_vertex_decl.position.type = VAR_FLOAT; + dummy_vertex_decl.position.type = ComponentFormat::Float; dummy_vertex_decl.position.enable = true; dummy_vertex_decl.stride = sizeof(float) * 4; NativeVertexFormat* dummy_vertex_format = diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index aaf7a477f7..69f669cd4c 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -91,7 +91,7 @@ void VertexLoader::CompileVertexTranslator() m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.offset = nat_offset; - m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.posmtx.type = ComponentFormat::UByte; m_native_vtx_decl.posmtx.integer = true; nat_offset += 4; } @@ -110,7 +110,7 @@ void VertexLoader::CompileVertexTranslator() m_native_vtx_decl.position.components = pos_elements; m_native_vtx_decl.position.enable = true; m_native_vtx_decl.position.offset = nat_offset; - m_native_vtx_decl.position.type = VAR_FLOAT; + m_native_vtx_decl.position.type = ComponentFormat::Float; m_native_vtx_decl.position.integer = false; nat_offset += pos_elements * sizeof(float); @@ -134,7 +134,7 @@ void VertexLoader::CompileVertexTranslator() m_native_vtx_decl.normals[i].components = 3; m_native_vtx_decl.normals[i].enable = true; m_native_vtx_decl.normals[i].offset = nat_offset; - m_native_vtx_decl.normals[i].type = VAR_FLOAT; + m_native_vtx_decl.normals[i].type = ComponentFormat::Float; m_native_vtx_decl.normals[i].integer = false; nat_offset += 12; } @@ -143,7 +143,7 @@ void VertexLoader::CompileVertexTranslator() for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++) { m_native_vtx_decl.colors[i].components = 4; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; TPipelineFunction pFunc = @@ -166,7 +166,7 @@ void VertexLoader::CompileVertexTranslator() for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { m_native_vtx_decl.texcoords[i].offset = nat_offset; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; const auto tc = m_VtxDesc.high.TexCoord[i].Value(); diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 106f1cfd9e..330deef548 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -221,7 +221,7 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm native_format->components = count_out; native_format->enable = true; native_format->offset = m_dst_ofs; - native_format->type = VAR_FLOAT; + native_format->type = ComponentFormat::Float; native_format->integer = false; m_dst_ofs += sizeof(float) * count_out; @@ -429,7 +429,7 @@ void VertexLoaderARM64::GenerateVertexLoader() m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.offset = m_dst_ofs; - m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.posmtx.type = ComponentFormat::UByte; m_native_vtx_decl.posmtx.integer = true; m_src_ofs += sizeof(u8); m_dst_ofs += sizeof(u32); @@ -493,7 +493,7 @@ void VertexLoaderARM64::GenerateVertexLoader() for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++) { m_native_vtx_decl.colors[i].components = 4; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent) @@ -509,7 +509,7 @@ void VertexLoaderARM64::GenerateVertexLoader() m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].enable = true; m_native_vtx_decl.colors[i].offset = m_dst_ofs; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; m_dst_ofs += 4; } @@ -518,7 +518,7 @@ void VertexLoaderARM64::GenerateVertexLoader() for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++) { m_native_vtx_decl.texcoords[i].offset = m_dst_ofs; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::S ? 1 : 2; @@ -540,7 +540,7 @@ void VertexLoaderARM64::GenerateVertexLoader() { m_native_vtx_decl.texcoords[i].components = 3; m_native_vtx_decl.texcoords[i].enable = true; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index ef1e2f7aee..b0922e5a6e 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -146,7 +146,8 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) std::memset(&new_decl, 0, sizeof(new_decl)); new_decl.stride = decl.stride; - auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) { + auto MakeDummyAttribute = [](AttributeFormat& attr, ComponentFormat type, int components, + bool integer) { attr.type = type; attr.components = components; attr.offset = 0; @@ -164,32 +165,32 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) if (decl.position.enable) CopyAttribute(new_decl.position, decl.position); else - MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false); + MakeDummyAttribute(new_decl.position, ComponentFormat::Float, 1, false); for (size_t i = 0; i < std::size(new_decl.normals); i++) { if (decl.normals[i].enable) CopyAttribute(new_decl.normals[i], decl.normals[i]); else - MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false); + MakeDummyAttribute(new_decl.normals[i], ComponentFormat::Float, 1, false); } for (size_t i = 0; i < std::size(new_decl.colors); i++) { if (decl.colors[i].enable) CopyAttribute(new_decl.colors[i], decl.colors[i]); else - MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false); + MakeDummyAttribute(new_decl.colors[i], ComponentFormat::UByte, 4, false); } for (size_t i = 0; i < std::size(new_decl.texcoords); i++) { if (decl.texcoords[i].enable) CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]); else - MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false); + MakeDummyAttribute(new_decl.texcoords[i], ComponentFormat::Float, 1, false); } if (decl.posmtx.enable) CopyAttribute(new_decl.posmtx, decl.posmtx); else - MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true); + MakeDummyAttribute(new_decl.posmtx, ComponentFormat::UByte, 1, true); return GetOrCreateMatchingFormat(new_decl); } diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index 4022863e59..40ae508219 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -122,7 +122,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com native_format->components = count_out; native_format->enable = true; native_format->offset = m_dst_ofs; - native_format->type = VAR_FLOAT; + native_format->type = ComponentFormat::Float; native_format->integer = false; m_dst_ofs += sizeof(float) * count_out; @@ -421,7 +421,7 @@ void VertexLoaderX64::GenerateVertexLoader() m_native_vtx_decl.posmtx.components = 4; m_native_vtx_decl.posmtx.enable = true; m_native_vtx_decl.posmtx.offset = m_dst_ofs; - m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.posmtx.type = ComponentFormat::UByte; m_native_vtx_decl.posmtx.integer = true; m_src_ofs += sizeof(u8); m_dst_ofs += sizeof(u32); @@ -467,7 +467,7 @@ void VertexLoaderX64::GenerateVertexLoader() m_native_vtx_decl.colors[i].components = 4; m_native_vtx_decl.colors[i].enable = true; m_native_vtx_decl.colors[i].offset = m_dst_ofs; - m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE; + m_native_vtx_decl.colors[i].type = ComponentFormat::UByte; m_native_vtx_decl.colors[i].integer = false; m_dst_ofs += 4; } @@ -488,7 +488,7 @@ void VertexLoaderX64::GenerateVertexLoader() { m_native_vtx_decl.texcoords[i].components = 3; m_native_vtx_decl.texcoords[i].enable = true; - m_native_vtx_decl.texcoords[i].type = VAR_FLOAT; + m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float; m_native_vtx_decl.texcoords[i].integer = false; MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i])); if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent) From f0f12ac8d75de2c16a9938079d2ddfa947c3acd4 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 5 Jul 2021 23:07:20 -0700 Subject: [PATCH 19/23] Fifo analyzer: Decode floats in primitive data --- Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp | 94 +++++++++++++++++++-- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index 4f76f2d263..be9daf57d3 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -613,19 +613,103 @@ public: const auto& vtx_desc = m_cpmem.vtx_desc; const auto& vtx_attr = m_cpmem.vtx_attr[vat]; - const auto component_sizes = VertexLoaderBase::GetVertexComponentSizes(vtx_desc, vtx_attr); u32 i = 0; + const auto process_component = [&](VertexComponentFormat cformat, ComponentFormat format, + u32 non_indexed_count, u32 indexed_count = 1) { + u32 count; + if (cformat == VertexComponentFormat::NotPresent) + return; + else if (cformat == VertexComponentFormat::Index8) + { + format = ComponentFormat::UByte; + count = indexed_count; + } + else if (cformat == VertexComponentFormat::Index16) + { + format = ComponentFormat::UShort; + count = indexed_count; + } + else + { + count = non_indexed_count; + } + + const u32 component_size = GetElementSize(format); + for (u32 j = 0; j < count; j++) + { + for (u32 component_off = 0; component_off < component_size; component_off++) + { + text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0')); + } + if (format == ComponentFormat::Float) + { + const float value = Common::BitCast(Common::swap32(&vertex_data[i])); + text += QStringLiteral(" (%1)").arg(value); + } + i += component_size; + text += QLatin1Char{' '}; + } + text += QLatin1Char{' '}; + }; + const auto process_simple_component = [&](u32 size) { + for (u32 component_off = 0; component_off < size; component_off++) + { + text += QStringLiteral("%1").arg(vertex_data[i + component_off], 2, 16, QLatin1Char('0')); + } + i += size; + text += QLatin1Char{' '}; + text += QLatin1Char{' '}; + }; + for (u32 vertex_num = 0; vertex_num < num_vertices; vertex_num++) { + ASSERT(i == vertex_num * vertex_size); + text += QLatin1Char{'\n'}; - for (u32 comp_size : component_sizes) + if (vtx_desc.low.PosMatIdx) + process_simple_component(1); + for (auto texmtxidx : vtx_desc.low.TexMatIdx) { - for (u32 comp_off = 0; comp_off < comp_size; comp_off++) + if (texmtxidx) + process_simple_component(1); + } + process_component(vtx_desc.low.Position, vtx_attr.g0.PosFormat, + vtx_attr.g0.PosElements == CoordComponentCount::XY ? 2 : 3); + // TODO: Is this calculation correct? + const u32 normal_component_count = + vtx_desc.low.Normal == VertexComponentFormat::Direct ? 3 : 1; + const u32 normal_elements = vtx_attr.g0.NormalElements == NormalComponentCount::NBT ? 3 : 1; + process_component(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat, + normal_component_count * normal_elements, + vtx_attr.g0.NormalIndex3 ? normal_elements : 1); + for (u32 c = 0; c < vtx_desc.low.Color.Size(); c++) + { + static constexpr Common::EnumMap component_sizes = { + 2, // RGB565 + 3, // RGB888 + 4, // RGB888x + 2, // RGBA4444 + 3, // RGBA6666 + 4, // RGBA8888 + }; + switch (vtx_desc.low.Color[c]) { - text += QStringLiteral("%1").arg(vertex_data[i++], 2, 16, QLatin1Char('0')); + case VertexComponentFormat::Index8: + process_simple_component(1); + break; + case VertexComponentFormat::Index16: + process_simple_component(2); + break; + case VertexComponentFormat::Direct: + process_simple_component(component_sizes[vtx_attr.GetColorFormat(c)]); + break; } - text += QLatin1Char{' '}; + } + for (u32 t = 0; t < vtx_desc.high.TexCoord.Size(); t++) + { + process_component(vtx_desc.high.TexCoord[t], vtx_attr.GetTexFormat(t), + vtx_attr.GetTexElements(t) == TexComponentCount::ST ? 2 : 1); } } } From d1cc539476b2937e21bfa837f502c6f807ad7354 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Wed, 28 Jul 2021 17:18:05 -0700 Subject: [PATCH 20/23] BPMemory: Correct spelling of MaxAniso --- Source/Core/VideoCommon/BPMemory.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index f5009875e7..098e72d5c0 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -917,14 +917,14 @@ struct fmt::formatter : EnumFormatter formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {} }; -enum class MaxAnsio +enum class MaxAniso { One = 0, Two = 1, Four = 2, }; template <> -struct fmt::formatter : EnumFormatter +struct fmt::formatter : EnumFormatter { formatter() : EnumFormatter({"1", "2", "4"}) {} }; @@ -938,7 +938,7 @@ union TexMode0 BitField<7, 1, FilterMode> min_filter; BitField<8, 1, LODType> diag_lod; BitField<9, 8, s32> lod_bias; - BitField<19, 2, MaxAnsio> max_aniso; + BitField<19, 2, MaxAniso> max_aniso; BitField<21, 1, bool, u32> lod_clamp; u32 hex; }; From f4f4dbbc63a0b41da0216cdd00d635f4faa16bd2 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 5 Sep 2021 18:51:52 -0700 Subject: [PATCH 21/23] Switch to the Play / Record tab when fifo playback stops --- Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp | 14 ++++++++------ Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h | 4 ++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp index 253017a952..c9f9a6ceff 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp @@ -150,18 +150,18 @@ void FIFOPlayerWindow::CreateWidgets() layout->addWidget(recording_group); layout->addWidget(m_button_box); - QWidget* main_widget = new QWidget(this); - main_widget->setLayout(layout); + m_main_widget = new QWidget(this); + m_main_widget->setLayout(layout); - auto* tab_widget = new QTabWidget(this); + m_tab_widget = new QTabWidget(this); m_analyzer = new FIFOAnalyzer; - tab_widget->addTab(main_widget, tr("Play / Record")); - tab_widget->addTab(m_analyzer, tr("Analyze")); + m_tab_widget->addTab(m_main_widget, tr("Play / Record")); + m_tab_widget->addTab(m_analyzer, tr("Analyze")); auto* tab_layout = new QVBoxLayout; - tab_layout->addWidget(tab_widget); + tab_layout->addWidget(m_tab_widget); setLayout(tab_layout); } @@ -250,6 +250,8 @@ void FIFOPlayerWindow::OnEmulationStopped() StopRecording(); UpdateControls(); + // When emulation stops, switch away from the analyzer tab, as it no longer shows anything useful + m_tab_widget->setCurrentWidget(m_main_widget); m_analyzer->Update(); } diff --git a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h index 7f6fbf6f1a..2fe7bce352 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h +++ b/Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h @@ -12,6 +12,7 @@ class QDialogButtonBox; class QLabel; class QPushButton; class QSpinBox; +class QTabWidget; class FIFOAnalyzer; class FIFOPlayerWindow : public QWidget @@ -64,6 +65,9 @@ private: QCheckBox* m_early_memory_updates; QDialogButtonBox* m_button_box; + QWidget* m_main_widget; + QTabWidget* m_tab_widget; + FIFOAnalyzer* m_analyzer; Core::State m_emu_state = Core::State::Uninitialized; }; From e7d5f8ad5c1a018b4e0187256cd8f1b8e0b50e58 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 4 Nov 2021 12:11:51 -0700 Subject: [PATCH 22/23] TextureCacheBase: Re-wrap GetTexture comment --- Source/Core/VideoCommon/TextureCacheBase.cpp | 44 +++++++------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index c774b6d2d1..3041e30473 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1300,42 +1300,30 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur // Search the texture cache for textures by address // // Find all texture cache entries for the current texture address, and decide whether to use one - // of - // them, or to create a new one + // of them, or to create a new one // // In most cases, the fastest way is to use only one texture cache entry for the same address. - // Usually, - // when a texture changes, the old version of the texture is unlikely to be used again. If there - // were - // new cache entries created for normal texture updates, there would be a slowdown due to a huge - // amount - // of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is - // faster than creating a new one from scratch. + // Usually, when a texture changes, the old version of the texture is unlikely to be used again. + // If there were new cache entries created for normal texture updates, there would be a slowdown + // due to a huge amount of unused cache entries. Also thanks to texture pooling, overwriting an + // existing cache entry is faster than creating a new one from scratch. // // Some games use the same address for different textures though. If the same cache entry was used - // in - // this case, it would be constantly overwritten, and effectively there wouldn't be any caching - // for - // those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has - // multiple - // sets of fonts on each other stored in a single texture and uses the palette to make different - // characters visible or invisible. In Castlevania 3 some textures are used for 2 different things - // or - // at least in 2 different ways(size 1024x1024 vs 1024x256). + // in this case, it would be constantly overwritten, and effectively there wouldn't be any caching + // for those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has + // multiple sets of fonts on each other stored in a single texture and uses the palette to make + // different characters visible or invisible. In Castlevania 3 some textures are used for 2 + // different things or at least in 2 different ways (size 1024x1024 vs 1024x256). // // To determine whether to use multiple cache entries or a single entry, use the following - // heuristic: - // If the same texture address is used several times during the same frame, assume the address is - // used - // for different purposes and allow creating an additional cache entry. If there's at least one - // entry - // that hasn't been used for the same frame, then overwrite it, in order to keep the cache as - // small as - // possible. If the current texture is found in the cache, use that entry. + // heuristic: If the same texture address is used several times during the same frame, assume the + // address is used for different purposes and allow creating an additional cache entry. If there's + // at least one entry that hasn't been used for the same frame, then overwrite it, in order to + // keep the cache as small as possible. If the current texture is found in the cache, use that + // entry. // // For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else - // it was - // done in vain. + // it was done in vain. auto iter_range = textures_by_address.equal_range(texture_info.GetRawAddress()); TexAddrCache::iterator iter = iter_range.first; TexAddrCache::iterator oldest_entry = iter; From ffa512f5e7d24d9d9e93366bb596b9473d26d61b Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 18 Dec 2021 15:24:14 -0800 Subject: [PATCH 23/23] DolphinTool: Remove direct dependency on core Videocommon also depends on core, which resulted in linking errors (though I'm not sure why). Ideally, dolphintool woudln't depend on videocommon... but some stuff in core does. --- Source/Core/DolphinTool/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Core/DolphinTool/CMakeLists.txt b/Source/Core/DolphinTool/CMakeLists.txt index 19eb651273..4b209ac98b 100644 --- a/Source/Core/DolphinTool/CMakeLists.txt +++ b/Source/Core/DolphinTool/CMakeLists.txt @@ -12,7 +12,6 @@ set_target_properties(dolphin-tool PROPERTIES OUTPUT_NAME dolphin-tool) target_link_libraries(dolphin-tool PRIVATE - core discio videocommon cpp-optparse