From 53663c00b931c675199ab9f0eb0d0af18083995d Mon Sep 17 00:00:00 2001 From: mimimi085181 Date: Thu, 29 Jun 2017 23:09:32 +0200 Subject: [PATCH] Implement minimal emulation of TMEM caching This is a remake of https://github.com/dolphin-emu/dolphin/pull/3749 Full credit goes to phire. Old message: "If none of the texture registers have changed and TMEM hasn't been invalidated or changed in other ways, we can blindly reuse the old texture cache entries without rehashing. Not only does this fix the bloom effect in Spyro: A Hero's Tail (The game abused texture cache) but it will also provide speedups for other games which use the same texture over multiple draw calls, especially when safe texture cache is in use." Changed the pr per phire's instructions to only return the current texture(s) if none of the texture registers were changed. If any texture register was changed, fall back to the default hashing and rebuilding textures from memory. --- Source/Core/VideoCommon/BPStructs.cpp | 9 ++++ Source/Core/VideoCommon/TextureCacheBase.cpp | 54 +++++++++++++++---- Source/Core/VideoCommon/TextureCacheBase.h | 8 ++- Source/Core/VideoCommon/VertexManagerBase.cpp | 1 - 4 files changed, 60 insertions(+), 12 deletions(-) diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index cd6f84f446..204c637b94 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -285,6 +285,8 @@ static void BPWritten(const BPCmd& bp) if (g_bRecordFifoData) FifoRecorder::GetInstance().UseMemory(addr, tlutXferCount, MemoryUpdate::TMEM); + TextureCacheBase::InvalidateAllBindPoints(); + return; } case BPMEM_FOGRANGE: // Fog Settings Control @@ -397,6 +399,7 @@ static void BPWritten(const BPCmd& bp) return; case BPMEM_TEXINVALIDATE: // TODO: Needs some restructuring in TextureCacheBase. + TextureCacheBase::InvalidateAllBindPoints(); return; case BPMEM_ZCOMPARE: // Set the Z-Compare and EFB pixel format @@ -499,6 +502,8 @@ static void BPWritten(const BPCmd& bp) if (g_bRecordFifoData) FifoRecorder::GetInstance().UseMemory(src_addr, bytes_read, MemoryUpdate::TMEM); + + TextureCacheBase::InvalidateAllBindPoints(); } return; @@ -582,10 +587,12 @@ static void BPWritten(const BPCmd& bp) // ------------------------ case BPMEM_TX_SETMODE0: // (0x90 for linear) case BPMEM_TX_SETMODE0_4: + TextureCacheBase::InvalidateAllBindPoints(); return; case BPMEM_TX_SETMODE1: case BPMEM_TX_SETMODE1_4: + TextureCacheBase::InvalidateAllBindPoints(); return; // -------------------------------------------- // BPMEM_TX_SETIMAGE0 - Texture width, height, format @@ -602,6 +609,7 @@ static void BPWritten(const BPCmd& bp) case BPMEM_TX_SETIMAGE2_4: case BPMEM_TX_SETIMAGE3: case BPMEM_TX_SETIMAGE3_4: + TextureCacheBase::InvalidateAllBindPoints(); return; // ------------------------------- // Set a TLUT @@ -609,6 +617,7 @@ static void BPWritten(const BPCmd& bp) // ------------------------------- case BPMEM_TX_SETTLUT: case BPMEM_TX_SETTLUT_4: + TextureCacheBase::InvalidateAllBindPoints(); return; default: diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index c61fa1fe5e..f0733c7ea0 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -42,6 +42,8 @@ static const int TEXTURE_POOL_KILL_THRESHOLD = 3; std::unique_ptr g_texture_cache; +std::bitset<8> TextureCacheBase::valid_bind_points; + TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr tex) : texture(std::move(tex)) { @@ -76,11 +78,17 @@ TextureCacheBase::TextureCacheBase() HiresTexture::Init(); SetHash64Function(); + + InvalidateAllBindPoints(); } void TextureCacheBase::Invalidate() { - UnbindTextures(); + InvalidateAllBindPoints(); + for (size_t i = 0; i < bound_textures.size(); ++i) + { + bound_textures[i] = nullptr; + } for (auto& tex : textures_by_address) { @@ -138,7 +146,11 @@ void TextureCacheBase::Cleanup(int _frameCount) TexAddrCache::iterator tcend = textures_by_address.end(); while (iter != tcend) { - if (iter->second->frameCount == FRAMECOUNT_INVALID) + if (iter->second->tmem_only) + { + iter = InvalidateTexture(iter); + } + else if (iter->second->frameCount == FRAMECOUNT_INVALID) { iter->second->frameCount = _frameCount; ++iter; @@ -307,7 +319,7 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale while (iter.first != iter.second) { TCacheEntry* entry = iter.first->second; - if (entry != entry_to_update && entry->IsEfbCopy() && + if (entry != entry_to_update && entry->IsEfbCopy() && !entry->tmem_only && entry->references.count(entry_to_update) == 0 && entry->OverlapsMemoryRange(entry_to_update->addr, entry_to_update->size_in_bytes) && entry->memory_stride == numBlocksX * block_size) @@ -450,6 +462,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ReturnEntry(unsigned int stage, GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true); + // We need to keep track of invalided textures until they have actually been replaced or re-loaded + valid_bind_points.set(stage); + return entry; } @@ -457,18 +472,19 @@ void TextureCacheBase::BindTextures() { for (size_t i = 0; i < bound_textures.size(); ++i) { - if (bound_textures[i]) + if (IsValidBindPoint(static_cast(i)) && bound_textures[i]) bound_textures[i]->texture->Bind(static_cast(i)); } } -void TextureCacheBase::UnbindTextures() -{ - bound_textures.fill(nullptr); -} - TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) { + // if this stage was not invalidated by changes to texture registers, keep the current texture + if (IsValidBindPoint(stage) && bound_textures[stage]) + { + return ReturnEntry(stage, bound_textures[stage]); + } + const FourTexUnits& tex = bpmem.tex[stage >> 2]; const u32 id = stage & 3; const u32 address = (tex.texImage3[id].image_base /* & 0x1FFFFF*/) << 5; @@ -610,6 +626,14 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) while (iter != iter_range.second) { TCacheEntry* entry = iter->second; + + // Skip entries that are only left in our texture cache for the tmem cache emulation + if (entry->tmem_only) + { + ++iter; + continue; + } + // Do not load strided EFB copies, they are not meant to be used directly if (entry->IsEfbCopy() && entry->native_width == nativeW && entry->native_height == nativeH && entry->memory_stride == entry->BytesPerRow()) @@ -1466,6 +1490,18 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter) entry->textures_by_hash_iter = textures_by_hash.end(); } + for (size_t i = 0; i < bound_textures.size(); ++i) + { + // If the entry is currently bound and not invalidated, keep it, but mark it as invalidated. + // This way it can still be used via tmem cache emulation, but nothing else. + // Spyro: A Hero's Tail is known for using such overwritten textures. + if (bound_textures[i] == entry && IsValidBindPoint(static_cast(i))) + { + bound_textures[i]->tmem_only = true; + return ++iter; + } + } + auto config = entry->texture->GetConfig(); texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture))); diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 23f25f48fc..56bf6fb849 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -39,6 +40,7 @@ public: bool is_efb_copy; bool is_custom_tex; bool may_have_overlapping_textures = true; + bool tmem_only = false; // indicates that this texture only exists in the tmem cache unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view @@ -125,8 +127,9 @@ public: virtual void DeleteShaders() = 0; TCacheEntry* Load(const u32 stage); - void UnbindTextures(); - virtual void BindTextures(); + static void InvalidateAllBindPoints() { valid_bind_points.reset(); } + static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); } + void BindTextures(); void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); @@ -158,6 +161,7 @@ protected: size_t temp_size = 0; std::array bound_textures{}; + static std::bitset<8> valid_bind_points; private: // Minimal version of TCacheEntry just for TexPool diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index b08f311c0a..b6088ab2e4 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -243,7 +243,6 @@ void VertexManagerBase::Flush() if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; - g_texture_cache->UnbindTextures(); for (unsigned int i : usedtextures) { const auto* tentry = g_texture_cache->Load(i);