diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 51fc9bb618..59c862f1dd 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -650,6 +650,7 @@ + @@ -1209,6 +1210,7 @@ + diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 110ae5e803..dd23b94da6 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -32,6 +32,7 @@ #include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" +#include "VideoCommon/TMEM.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VertexShaderManager.h" @@ -353,7 +354,7 @@ static void BPWritten(const BPCmd& bp) if (OpcodeDecoder::g_record_fifo_data) FifoRecorder::GetInstance().UseMemory(addr, tlutXferCount, MemoryUpdate::TMEM); - TextureCacheBase::InvalidateAllBindPoints(); + TMEM::InvalidateAll(); return; } @@ -459,8 +460,7 @@ static void BPWritten(const BPCmd& bp) } return; case BPMEM_TEXINVALIDATE: - // TODO: Needs some restructuring in TextureCacheBase. - TextureCacheBase::InvalidateAllBindPoints(); + TMEM::Invalidate(bp.newvalue); return; case BPMEM_ZCOMPARE: // Set the Z-Compare and EFB pixel format @@ -568,7 +568,7 @@ static void BPWritten(const BPCmd& bp) if (OpcodeDecoder::g_record_fifo_data) FifoRecorder::GetInstance().UseMemory(src_addr, bytes_read, MemoryUpdate::TMEM); - TextureCacheBase::InvalidateAllBindPoints(); + TMEM::InvalidateAll(); } return; @@ -661,7 +661,7 @@ static void BPWritten(const BPCmd& bp) // ------------------------ case TexUnitAddress::Register::SETMODE0: case TexUnitAddress::Register::SETMODE1: - TextureCacheBase::InvalidateAllBindPoints(); + TMEM::ConfigurationChanged(tex_address, bp.newvalue); return; // -------------------------------------------- @@ -675,7 +675,7 @@ static void BPWritten(const BPCmd& bp) case TexUnitAddress::Register::SETIMAGE1: case TexUnitAddress::Register::SETIMAGE2: case TexUnitAddress::Register::SETIMAGE3: - TextureCacheBase::InvalidateAllBindPoints(); + TMEM::ConfigurationChanged(tex_address, bp.newvalue); return; // ------------------------------- @@ -683,7 +683,7 @@ static void BPWritten(const BPCmd& bp) // BPMEM_TX_SETTLUT - Format, TMEM Offset (offset of TLUT from start of TMEM high bank > > 5) // ------------------------------- case TexUnitAddress::Register::SETTLUT: - TextureCacheBase::InvalidateAllBindPoints(); + TMEM::ConfigurationChanged(tex_address, bp.newvalue); return; case TexUnitAddress::Register::UNKNOWN: break; // Not handled diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 7890185f97..17bee4ab7a 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -90,6 +90,8 @@ add_library(videocommon TextureDecoder_Util.h TextureInfo.cpp TextureInfo.h + TMEM.cpp + TMEM.h UberShaderCommon.cpp UberShaderCommon.h UberShaderPixel.cpp @@ -170,12 +172,12 @@ if(FFmpeg_FOUND) FFmpeg::swresample FFmpeg::swscale ) - if(APPLE) + if(APPLE) target_link_libraries(videocommon PRIVATE ${COREMEDIA_LIBRARY} ${VIDEOTOOLBOX_LIBRARY} ${COREVIDEO_LIBRARY} - ${AUDIOTOOLBOX_LIBRARY} + ${AUDIOTOOLBOX_LIBRARY} ) endif() endif() diff --git a/Source/Core/VideoCommon/TMEM.cpp b/Source/Core/VideoCommon/TMEM.cpp new file mode 100644 index 0000000000..b0cf61b8f2 --- /dev/null +++ b/Source/Core/VideoCommon/TMEM.cpp @@ -0,0 +1,224 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "VideoCommon/BPMemory.h" +#include "VideoCommon/TMEM.h" + +namespace TMEM +{ +struct TextureUnitState +{ + enum class State + { + // Cache is invalid. Configuration has changed + INVALID, + + // Valid, but not cached due to either being too big, or overlapping with another texture unit + VALID, + + // Texture unit has cached all of the previous draw + CACHED, + }; + + struct BankConfig + { + u32 width = 0; + u32 height = 0; + u32 base = 0; + u32 size = 0; + bool Overlaps(const BankConfig& other) const; + }; + + BankConfig even; + BankConfig odd; + State state; + + bool Overlaps(const TextureUnitState& other) const; +}; + +static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config); + +static std::array s_unit; + +// On TMEM configuration changed: +// 1. invalidate stage. + +void ConfigurationChanged(TexUnitAddress bp_addr, u32 config) +{ + TextureUnitState& unit_state = s_unit[bp_addr.GetUnitID()]; + + // If anything has changed, we can't assume existing state is still valid. + unit_state.state = TextureUnitState::State::INVALID; + + // Note: BPStructs has already filtered out NOP changes before calling us + switch (bp_addr.Reg) + { + case TexUnitAddress::Register::SETIMAGE1: + { + // Image Type and Even bank's Cache Height, Cache Width, TMEM Offset + TexImage1 even = {.hex = config}; + unit_state.even = {even.cache_width, even.cache_height, even.tmem_even << 5, 0}; + break; + } + case TexUnitAddress::Register::SETIMAGE2: + { + // Odd bank's Cache Height, Cache Width, TMEM Offset + TexImage2 odd = {.hex = config}; + unit_state.odd = {odd.cache_width, odd.cache_height, odd.tmem_odd << 5, 0}; + break; + } + default: + // Something else has changed + return; + } +} + +void InvalidateAll() +{ + for (auto& unit : s_unit) + { + unit.state = TextureUnitState::State::INVALID; + } +} + +// On invalidate cache: +// 1. invalidate all texture units. + +void Invalidate([[maybe_unused]] u32 param) +{ + // The exact arguments of Invalidate commands is currently unknown. + // It appears to contain the TMEM address and a size. + + // For simplicity, we will just invalidate everything + InvalidateAll(); +} + +// On bind: +// 1. use mipmapping/32bit status to calculate final sizes +// 2. if texture size is small enough to fit in region mark as cached. +// otherwise, mark as valid + +void Bind(u32 unit, int width, int height, bool is_mipmapped, bool is_32_bit) +{ + TextureUnitState& unit_state = s_unit[unit]; + + // All textures use the even bank. + // It holds the level 0 mipmap (and other even mipmap LODs, if mipmapping is enabled) + unit_state.even.size = CalculateUnitSize(unit_state.even); + + bool fits = (width * height * 32U) <= unit_state.even.size; + + if (is_mipmapped || is_32_bit) + { + // And the odd bank is enabled when either mipmapping is enabled or the texture is 32 bit + // It holds the Alpha and Red channels of 32 bit textures or the odd layers of a mipmapped + // texture + unit_state.odd.size = CalculateUnitSize(unit_state.odd); + + fits = fits && (width * height * 32U) <= unit_state.odd.size; + } + else + { + unit_state.odd.size = 0; + } + + if (is_mipmapped) + { + // TODO: This is what games appear to expect from hardware. But seems odd, as it doesn't line up + // with how much extra memory is required for mipmapping, just 33% more. + // Hardware testing is required to see exactly what gets used. + + // When mipmapping is enabled, the even bank is doubled in size + // The extended region holds the remaining even mipmap layers + unit_state.even.size *= 2; + + if (is_32_bit) + { + // When a 32bit texture is mipmapped, the odd bank is also doubled in size + unit_state.odd.size *= 2; + } + } + + unit_state.state = fits ? TextureUnitState::State::CACHED : TextureUnitState::State::VALID; +} + +static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config) +{ + u32 width = bank_config.width; + u32 height = bank_config.height; + + // These are the only cache sizes supported by the sdk + if (width == height) + { + switch (width) + { + case 3: // 32KB + return 32 * 1024; + case 4: // 128KB + return 128 * 1024; + case 5: // 512KB + return 512 * 1024; + default: + break; + } + } + + // However, the registers allow a much larger amount of configurablity. + // Maybe other sizes are broken? + // Until hardware tests are done, this is a guess at the size algorithm + + return 512 * (1 << width) * (1 << height); +} + +bool TextureUnitState::BankConfig::Overlaps(const BankConfig& other) const +{ + if (size == 0 || other.size == 0) + return false; + return (base <= other.base && (base + size) > other.base) || + (other.base <= base && (other.base + other.size) > base); +} + +bool TextureUnitState::Overlaps(const TextureUnitState& other) const +{ + if (state == TextureUnitState::State::INVALID || other.state == TextureUnitState::State::INVALID) + return false; + return even.Overlaps(other.even) || even.Overlaps(other.odd) || odd.Overlaps(other.even) || + odd.Overlaps(other.odd); +} + +// Scans though active texture units checks for overlaps. +void FinalizeBinds(BitSet32 used_textures) +{ + for (u32 i : used_textures) + { + if (s_unit[i].even.Overlaps(s_unit[i].odd)) + { // Self-overlap + s_unit[i].state = TextureUnitState::State::VALID; + } + for (size_t j = 0; j < s_unit.size(); j++) + { + if (j != i && s_unit[i].Overlaps(s_unit[j])) + { + // There is an overlap, downgrade both from CACHED + // (for there to be an overlap, both must have started as valid or cached) + s_unit[i].state = TextureUnitState::State::VALID; + s_unit[j].state = TextureUnitState::State::VALID; + } + } + } +} + +bool IsCached(u32 unit) +{ + return s_unit[unit].state == TextureUnitState::State::CACHED; +} + +bool IsValid(u32 unit) +{ + return s_unit[unit].state != TextureUnitState::State::INVALID; +} + +} // namespace TMEM diff --git a/Source/Core/VideoCommon/TMEM.h b/Source/Core/VideoCommon/TMEM.h new file mode 100644 index 0000000000..26f7d12f0a --- /dev/null +++ b/Source/Core/VideoCommon/TMEM.h @@ -0,0 +1,21 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" + +#include "VideoCommon/BPMemory.h" + +namespace TMEM +{ +void InvalidateAll(); +void Invalidate(u32 param); +void ConfigurationChanged(TexUnitAddress bp_addr, u32 config); +void Bind(u32 unit, int num_blocks_width, int num_blocks_height, bool is_mipmapped, bool is_32_bit); +void FinalizeBinds(BitSet32 used_textures); +bool IsCached(u32 unit); +bool IsValid(u32 unit); +} // namespace TMEM diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index e113f15832..9a01b57007 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -43,6 +43,7 @@ #include "VideoCommon/SamplerCommon.h" #include "VideoCommon/ShaderCache.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/TMEM.h" #include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/TextureConverterShaderGen.h" #include "VideoCommon/TextureDecoder.h" @@ -57,8 +58,6 @@ static const int TEXTURE_POOL_KILL_THRESHOLD = 3; std::unique_ptr g_texture_cache; -std::bitset<8> TextureCacheBase::valid_bind_points; - TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr tex, std::unique_ptr fb) : texture(std::move(tex)), framebuffer(std::move(fb)) @@ -95,7 +94,7 @@ TextureCacheBase::TextureCacheBase() Common::SetHash64Function(); - InvalidateAllBindPoints(); + TMEM::InvalidateAll(); } TextureCacheBase::~TextureCacheBase() @@ -123,7 +122,7 @@ bool TextureCacheBase::Initialize() void TextureCacheBase::Invalidate() { FlushEFBCopies(); - InvalidateAllBindPoints(); + TMEM::InvalidateAll(); bound_textures.fill(nullptr); for (auto& tex : textures_by_address) @@ -1026,12 +1025,12 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, g_renderer->SetSamplerState(index, state); } -void TextureCacheBase::BindTextures() +void TextureCacheBase::BindTextures(BitSet32 used_textures) { for (u32 i = 0; i < bound_textures.size(); i++) { const TCacheEntry* tentry = bound_textures[i]; - if (IsValidBindPoint(i) && tentry) + if (used_textures[i] && tentry) { g_renderer->SetTexture(i, tentry->texture.get()); PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); @@ -1040,6 +1039,8 @@ void TextureCacheBase::BindTextures() SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips); } } + + TMEM::FinalizeBinds(used_textures); } class ArbitraryMipmapDetector @@ -1190,9 +1191,22 @@ private: TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) { // if this stage was not invalidated by changes to texture registers, keep the current texture - if (IsValidBindPoint(stage) && bound_textures[stage]) + if (TMEM::IsValid(stage) && bound_textures[stage]) { - return bound_textures[stage]; + TCacheEntry* entry = bound_textures[stage]; + // If the TMEM configuration is such that this texture is more or less guaranteed to still + // be in TMEM, then we know we can reuse the old entry without even hashing the memory + if (TMEM::IsCached(stage)) + { + return entry; + } + + // Otherwise, hash the backing memory and check it's unchanged. + // FIXME: this doesn't correctly handle textures from tmem. + if (!entry->tmem_only && entry->base_hash == entry->CalculateHash()) + { + return entry; + } } TextureInfo texture_info = TextureInfo::FromStage(stage); @@ -1207,7 +1221,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) // We need to keep track of invalided textures until they have actually been replaced or // re-loaded - valid_bind_points.set(stage); + TMEM::Bind(stage, entry->NumBlocksX(), entry->NumBlocksY(), entry->GetNumLevels() > 1, + entry->format == TextureFormat::RGBA8); return entry; } @@ -1510,7 +1525,7 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : texture_info.GetLevelCount(); // We can decode on the GPU if it is a supported format and the flag is enabled. - // Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both + // Currently we don't decode RGBA8 textures from TMEM, as that would require copying from both // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since // there's no conversion between formats. In the future this could be extended with a separate // shader, however. @@ -2537,10 +2552,10 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe for (size_t i = 0; i < bound_textures.size(); ++i) { - // If the entry is currently bound and not invalidated, keep it, but mark it as invalidated. - // This way it can still be used via tmem cache emulation, but nothing else. + // If the entry is currently bound and tmem has it recorded as cached, keep it, but mark it as + // invalidated. This way it can still be used via tmem cache emulation, but nothing else. // Spyro: A Hero's Tail is known for using such overwritten textures. - if (bound_textures[i] == entry && IsValidBindPoint(static_cast(i))) + if (bound_textures[i] == entry && TMEM::IsCached(static_cast(i))) { bound_textures[i]->tmem_only = true; return ++iter; @@ -2815,18 +2830,21 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con } u32 TextureCacheBase::TCacheEntry::BytesPerRow() const +{ + // RGBA takes two cache lines per block; all others take one + const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32; + + return NumBlocksX() * bytes_per_block; +} + +u32 TextureCacheBase::TCacheEntry::NumBlocksX() const { const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt); // Round up source height to multiple of block size const u32 actualWidth = Common::AlignUp(native_width, blockW); - const u32 numBlocksX = actualWidth / blockW; - - // RGBA takes two cache lines per block; all others take one - const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32; - - return numBlocksX * bytes_per_block; + return actualWidth / blockW; } u32 TextureCacheBase::TCacheEntry::NumBlocksY() const @@ -2883,6 +2901,8 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const { const u32 bytes_per_row = BytesPerRow(); const u32 hash_sample_size = HashSampleSize(); + + // FIXME: textures from tmem won't get the correct hash. u8* ptr = Memory::GetPointer(addr); if (memory_stride == bytes_per_row) { diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 0d456bfddf..12db848aa3 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -14,6 +14,7 @@ #include #include +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" #include "VideoCommon/AbstractTexture.h" @@ -175,6 +176,7 @@ public: bool IsEfbCopy() const { return is_efb_copy; } bool IsCopy() const { return is_xfb_copy || is_efb_copy; } + u32 NumBlocksX() const; u32 NumBlocksY() const; u32 BytesPerRow() const; @@ -214,13 +216,11 @@ public: void Invalidate(); TCacheEntry* Load(const u32 stage); - static void InvalidateAllBindPoints() { valid_bind_points.reset(); } - static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); } TCacheEntry* GetTexture(const int textureCacheSafetyColorSampleSize, TextureInfo& texture_info); TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, MathUtil::Rectangle* display_rect); - virtual void BindTextures(); + virtual void BindTextures(BitSet32 used_textures); void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, const MathUtil::Rectangle& srcRect, bool isIntensity, diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 73ab9af3c2..60bfa1b1d1 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -350,7 +350,7 @@ void VertexManagerBase::LoadTextures() for (unsigned int i : usedtextures) g_texture_cache->Load(i); - g_texture_cache->BindTextures(); + g_texture_cache->BindTextures(usedtextures); } void VertexManagerBase::Flush()