diff --git a/Source/Core/Common/Src/Hash.cpp b/Source/Core/Common/Src/Hash.cpp index 59cd0bb15d..22413f1bcd 100644 --- a/Source/Core/Common/Src/Hash.cpp +++ b/Source/Core/Common/Src/Hash.cpp @@ -21,6 +21,8 @@ #include #endif +static u64 (*ptrHashFunction)(const u8 *src, int len, u32 samples) = &GetMurmurHash3; + // uint32_t // WARNING - may read one more byte! // Implementation from Wikipedia. @@ -108,7 +110,128 @@ u32 HashEctor(const u8* ptr, int length) return(crc); } + #ifdef _M_X64 + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +inline u64 getblock(const u64 * p, int i) +{ + return p[i]; +} + +//---------- +// Block mix - combine the key bits with the hash bits and scramble everything + +inline void bmix64(u64 & h1, u64 & h2, u64 & k1, u64 & k2, u64 & c1, u64 & c2) +{ + k1 *= c1; + k1 = _rotl64(k1,23); + k1 *= c2; + h1 ^= k1; + h1 += h2; + + h2 = _rotl64(h2,41); + + k2 *= c2; + k2 = _rotl64(k2,23); + k2 *= c1; + h2 ^= k2; + h2 += h1; + + h1 = h1*3+0x52dce729; + h2 = h2*3+0x38495ab5; + + c1 = c1*5+0x7b7d159c; + c2 = c2*5+0x6bce6396; +} + +//---------- +// Finalization mix - avalanches all bits to within 0.05% bias + +inline u64 fmix64(u64 k) +{ + k ^= k >> 33; + k *= 0xff51afd7ed558ccd; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53; + k ^= k >> 33; + + return k; +} + +u64 GetMurmurHash3(const u8 *src, const int len, u32 samples) +{ + const u8 * data = (const u8*)src; + const int nblocks = len / 16; + + u64 h1 = 0x9368e53c2f6af274; + u64 h2 = 0x586dcd208f7cd3fd; + + u64 c1 = 0x87c37b91114253d5; + u64 c2 = 0x4cf5ad432745937f; + + //---------- + // body + + const u64 * blocks = (const u64 *)(data); + + for(int i = 0; i < nblocks; i++) + { + u64 k1 = getblock(blocks,i*2+0); + u64 k2 = getblock(blocks,i*2+1); + + bmix64(h1,h2,k1,k2,c1,c2); + } + + //---------- + // tail + + const u8 * tail = (const u8*)(data + nblocks*16); + + u64 k1 = 0; + u64 k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= u64(tail[14]) << 48; + case 14: k2 ^= u64(tail[13]) << 40; + case 13: k2 ^= u64(tail[12]) << 32; + case 12: k2 ^= u64(tail[11]) << 24; + case 11: k2 ^= u64(tail[10]) << 16; + case 10: k2 ^= u64(tail[ 9]) << 8; + case 9: k2 ^= u64(tail[ 8]) << 0; + + case 8: k1 ^= u64(tail[ 7]) << 56; + case 7: k1 ^= u64(tail[ 6]) << 48; + case 6: k1 ^= u64(tail[ 5]) << 40; + case 5: k1 ^= u64(tail[ 4]) << 32; + case 4: k1 ^= u64(tail[ 3]) << 24; + case 3: k1 ^= u64(tail[ 2]) << 16; + case 2: k1 ^= u64(tail[ 1]) << 8; + case 1: k1 ^= u64(tail[ 0]) << 0; + bmix64(h1,h2,k1,k2,c1,c2); + }; + + //---------- + // finalization + + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + + return h1; +} + + // CRC32 hash using the SSE4.2 instruction u64 GetCRC32(const u8 *src, int len, u32 samples) { @@ -133,60 +256,51 @@ u64 GetCRC32(const u8 *src, int len, u32 samples) #endif } -u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy) + +/* NOTE: This hash function is used for custom texture loading/dumping, so + it should not be changed, which would require all custom textures to be + recalculated for their new hash values. If the hashing function is + changed, make sure this one is still used when the legacy parameter is + true. */ +u64 GetHashHiresTexture(const u8 *src, int len, u32 samples) { const u64 m = 0xc6a4a7935bd1e995; u64 h = len * m; - -#if _M_SSE >= 0x402 - if (cpu_info.bSSE4_2 && !legacy) + const int r = 47; + u32 Step = (len / 8); + const u64 *data = (const u64 *)src; + const u64 *end = data + Step; + if(samples == 0) samples = Step; + Step = Step / samples; + if(Step < 1) Step = 1; + while(data < end) { - h = GetCRC32(src, len, samples); + u64 k = data[0]; + data+=Step; + k *= m; + k ^= k >> r; + k *= m; + h ^= k; + h *= m; } - else -#endif - /* NOTE: This hash function is used for custom texture loading/dumping, so - it should not be changed, which would require all custom textures to be - recalculated for their new hash values. If the hashing function is - changed, make sure this one is still used when the legacy parameter is - true. */ + + const u8 * data2 = (const u8*)end; + + switch(len & 7) { - const int r = 47; - u32 Step = (len / 8); - const u64 *data = (const u64 *)src; - const u64 *end = data + Step; - if(samples == 0) samples = Step; - Step = Step / samples; - if(Step < 1) Step = 1; - while(data < end) - { - u64 k = data[0]; - data+=Step; - k *= m; - k ^= k >> r; - k *= m; - h ^= k; - h *= m; - } - - const u8 * data2 = (const u8*)end; - - switch(len & 7) - { - case 7: h ^= u64(data2[6]) << 48; - case 6: h ^= u64(data2[5]) << 40; - case 5: h ^= u64(data2[4]) << 32; - case 4: h ^= u64(data2[3]) << 24; - case 3: h ^= u64(data2[2]) << 16; - case 2: h ^= u64(data2[1]) << 8; - case 1: h ^= u64(data2[0]); - h *= m; - }; - - h ^= h >> r; - h *= m; - h ^= h >> r; - } + case 7: h ^= u64(data2[6]) << 48; + case 6: h ^= u64(data2[5]) << 40; + case 5: h ^= u64(data2[4]) << 32; + case 4: h ^= u64(data2[3]) << 24; + case 3: h ^= u64(data2[2]) << 16; + case 2: h ^= u64(data2[1]) << 8; + case 1: h ^= u64(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; return h; } @@ -215,79 +329,191 @@ u64 GetCRC32(const u8 *src, int len, u32 samples) #endif } -u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy) +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +inline u32 getblock(const u32 * p, int i) { - const u32 m = 0x5bd1e995; - u64 h = 0; -#if _M_SSE >= 0x402 - if (cpu_info.bSSE4_2 && !legacy) + return p[i]; +} + +//---------- +// Finalization mix - force all bits of a hash block to avalanche + +// avalanches all bits to within 0.25% bias + +inline u32 fmix32(u32 h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +inline void bmix32(u32 & h1, u32 & h2, u32 & k1, u32 & k2, u32 & c1, u32 & c2) +{ + k1 *= c1; + k1 = _rotl(k1,11); + k1 *= c2; + h1 ^= k1; + h1 += h2; + + h2 = _rotl(h2,17); + + k2 *= c2; + k2 = _rotl(k2,11); + k2 *= c1; + h2 ^= k2; + h2 += h1; + + h1 = h1*3+0x52dce729; + h2 = h2*3+0x38495ab5; + + c1 = c1*5+0x7b7d159c; + c2 = c2*5+0x6bce6396; +} + +//---------- + +u64 GetMurmurHash3(const u8* src, int len, u32 samples) +{ + const u8 * data = (const u8*)src; + const int nblocks = len / 8; + u32 out[2]; + + u32 h1 = 0x8de1c3ac; + u32 h2 = 0xbab98226; + + u32 c1 = 0x95543787; + u32 c2 = 0x2ad7eb25; + + //---------- + // body + + const u32 * blocks = (const u32 *)(data + nblocks*8); + + for(int i = -nblocks; i; i++) { - h = GetCRC32(src, len, samples); + u32 k1 = getblock(blocks,i*2+0); + u32 k2 = getblock(blocks,i*2+1); + + bmix32(h1,h2,k1,k2,c1,c2); } - else -#endif + + //---------- + // tail + + const u8 * tail = (const u8*)(data + nblocks*8); + + u32 k1 = 0; + u32 k2 = 0; + + switch(len & 7) { - const int r = 24; - - u32 h1 = len; - u32 h2 = 0; + case 7: k2 ^= tail[6] << 16; + case 6: k2 ^= tail[5] << 8; + case 5: k2 ^= tail[4] << 0; + case 4: k1 ^= tail[3] << 24; + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0] << 0; + bmix32(h1,h2,k1,k2,c1,c2); + }; - u32 Step = (len / 4); - const u32 * data = (const u32 *)src; - const u32 * end = data + Step; - const u8 * uEnd = (const u8 *)end; - if(samples == 0) samples = Step; - Step = Step / samples; + //---------- + // finalization - if(Step < 2) Step = 2; + h2 ^= len; - while(data < end) - { - u32 k1 = data[0]; - k1 *= m; - k1 ^= k1 >> r; - k1 *= m; - h1 *= m; - h1 ^= k1; - + h1 += h2; + h2 += h1; - u32 k2 = data[1]; - k2 *= m; - k2 ^= k2 >> r; - k2 *= m; - h2 *= m; - h2 ^= k2; - data+=Step; - } + h1 = fmix32(h1); + h2 = fmix32(h2); - if((len & 7) > 3) - { - u32 k1 = *(end - 1); - k1 *= m; - k1 ^= k1 >> r; - k1 *= m; - h1 *= m; - h1 ^= k1; - len -= 4; - } + h1 += h2; + h2 += h1; - switch(len & 3) - { - case 3: h2 ^= uEnd[2] << 16; - case 2: h2 ^= uEnd[1] << 8; - case 1: h2 ^= uEnd[0]; - h2 *= m; - }; + out[0] = h1; + out[1] = h2; + + return *((u64 *)&out); +} - h1 ^= h2 >> 18; h1 *= m; - h2 ^= h1 >> 22; h2 *= m; - h1 ^= h2 >> 17; h1 *= m; - h2 ^= h1 >> 19; h2 *= m; - - h = h1; - - h = (h << 32) | h2; +/* FIXME: The old 32-bit version of this hash made different hashes than the + 64-bit version. Until someone can make a new version of the 32-bit one that + makes identical hashes, this is just a c/p of the 64-bit one. */ +u64 GetHashHiresTexture(const u8 *src, int len, u32 samples) +{ + const u64 m = 0xc6a4a7935bd1e995; + u64 h = len * m; + const int r = 47; + u32 Step = (len / 8); + const u64 *data = (const u64 *)src; + const u64 *end = data + Step; + if(samples == 0) samples = Step; + Step = Step / samples; + if(Step < 1) Step = 1; + while(data < end) + { + u64 k = data[0]; + data+=Step; + k *= m; + k ^= k >> r; + k *= m; + h ^= k; + h *= m; } + + const u8 * data2 = (const u8*)end; + + switch(len & 7) + { + case 7: h ^= u64(data2[6]) << 48; + case 6: h ^= u64(data2[5]) << 40; + case 5: h ^= u64(data2[4]) << 32; + case 4: h ^= u64(data2[3]) << 24; + case 3: h ^= u64(data2[2]) << 16; + case 2: h ^= u64(data2[1]) << 8; + case 1: h ^= u64(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + return h; } #endif + +u64 GetHash64(const u8 *src, int len, u32 samples) +{ + return ptrHashFunction(src, len, samples); +} + +// sets the hash function used for the texture cache +void SetHash64Function(bool useHiresTextures) +{ + if (useHiresTextures) + { + ptrHashFunction = &GetHashHiresTexture; + } +#if _M_SSE >= 0x402 + else if (cpu_info.bSSE4_2 && !useHiresTextures) // sse crc32 version + { + ptrHashFunction = &GetCRC32; + } +#endif + else + { + ptrHashFunction = &GetMurmurHash3; + } +} + + + diff --git a/Source/Core/Common/Src/Hash.h b/Source/Core/Common/Src/Hash.h index fb5a3b1127..0e385d4270 100644 --- a/Source/Core/Common/Src/Hash.h +++ b/Source/Core/Common/Src/Hash.h @@ -25,5 +25,8 @@ u32 HashAdler32(const u8* data, size_t len); // Fairly accurate, slightl u32 HashFNV(const u8* ptr, int length); // Another fast and decent hash u32 HashEctor(const u8* ptr, int length); // JUNK. DO NOT USE FOR NEW THINGS u64 GetCRC32(const u8 *src, int len, u32 samples); // SSE4.2 version of CRC32 -u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy = false); +u64 GetHashHiresTexture(const u8 *src, int len, u32 samples); +u64 GetMurmurHash3(const u8 *src, int len, u32 samples); +u64 GetHash64(const u8 *src, int len, u32 samples); +void SetHash64Function(bool useHiresTextures); #endif // _HASH_H_ diff --git a/Source/Core/VideoCommon/Src/HiresTextures.cpp b/Source/Core/VideoCommon/Src/HiresTextures.cpp index 1fcd996427..694150d1ac 100644 --- a/Source/Core/VideoCommon/Src/HiresTextures.cpp +++ b/Source/Core/VideoCommon/Src/HiresTextures.cpp @@ -33,6 +33,8 @@ std::map textureMap; void Init(const char *gameCode) { + textureMap.clear(); + CFileSearch::XStringVector Directories; //Directories.push_back(std::string(File::GetUserPath(D_HIRESTEXTURES_IDX))); char szDir[MAX_PATH]; @@ -88,11 +90,6 @@ void Init(const char *gameCode) } } -void Shutdown() -{ - textureMap.clear(); -} - PC_TexFormat GetHiresTex(const char *fileName, unsigned int *pWidth, unsigned int *pHeight, int texformat, u8 *data) { std::string key(fileName); diff --git a/Source/Core/VideoCommon/Src/HiresTextures.h b/Source/Core/VideoCommon/Src/HiresTextures.h index dc4434e5d4..409c2cc260 100644 --- a/Source/Core/VideoCommon/Src/HiresTextures.h +++ b/Source/Core/VideoCommon/Src/HiresTextures.h @@ -25,7 +25,6 @@ namespace HiresTextures { void Init(const char *gameCode); -void Shutdown(); PC_TexFormat GetHiresTex(const char *fileName, unsigned int *pWidth, unsigned int *pHeight, int texformat, u8 *data); }; diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp index 3989cc57b6..e1bd47077b 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp @@ -23,8 +23,10 @@ enum TextureCache *g_texture_cache; -u8 *TextureCache::temp; +u8 *TextureCache::temp = NULL; + TextureCache::TexCache TextureCache::textures; +Common::CriticalSection TextureCache::texMutex; TextureCache::TCacheEntryBase::~TCacheEntryBase() { @@ -41,13 +43,17 @@ TextureCache::TCacheEntryBase::~TCacheEntryBase() TextureCache::TextureCache() { - temp = (u8*)AllocateMemoryPages(TEMP_SIZE); + if (!temp) + temp = (u8*)AllocateMemoryPages(TEMP_SIZE); TexDecoder_SetTexFmtOverlayOptions(g_ActiveConfig.bTexFmtOverlayEnable, g_ActiveConfig.bTexFmtOverlayCenter); - HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); + if(g_ActiveConfig.bHiresTextures && !g_ActiveConfig.bDumpTextures) + HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); + SetHash64Function(g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures); } void TextureCache::Invalidate(bool shutdown) { + texMutex.Enter(); TexCache::iterator iter = textures.begin(), tcend = textures.end(); @@ -59,14 +65,20 @@ void TextureCache::Invalidate(bool shutdown) } textures.clear(); - HiresTextures::Shutdown(); + if(g_ActiveConfig.bHiresTextures && !g_ActiveConfig.bDumpTextures) + HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); + SetHash64Function(g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures); + texMutex.Leave(); } TextureCache::~TextureCache() { Invalidate(true); - FreeMemoryPages(temp, TEMP_SIZE); - temp = NULL; + if (temp) + { + FreeMemoryPages(temp, TEMP_SIZE); + temp = NULL; + } } void TextureCache::Cleanup() @@ -180,22 +192,20 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage, const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat); const u32 palette_size = TexDecoder_GetPaletteSize(texformat); bool texture_is_dynamic = false; - bool forceLegacyHash = (g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures); unsigned int texLevels; PC_TexFormat pcfmt = PC_TEX_FMT_NONE; - // someone who understands this var could rename it :p - const bool isC4_C8_C14X2 = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2); + const bool isPaletteTexture = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2); - if (isC4_C8_C14X2) + if (isPaletteTexture) full_format = texformat | (tlutfmt << 16); // hires texture loading and texture dumping require accurate hashes - if (g_ActiveConfig.bSafeTextureCache || forceLegacyHash) + if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples, forceLegacyHash); + texHash = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); - if (isC4_C8_C14X2) + if (isPaletteTexture) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) // tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower. @@ -206,7 +216,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage, // we must make sure that texture with different tluts get different IDs. const u64 tlutHash = GetHash64(texMem + tlutaddr, palette_size, - g_ActiveConfig.iSafeTextureCache_ColorSamples, forceLegacyHash); + g_ActiveConfig.iSafeTextureCache_ColorSamples); texHash ^= tlutHash; @@ -229,7 +239,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage, { hash_value = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); - if (isC4_C8_C14X2) + if (isPaletteTexture) { hash_value ^= GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.h b/Source/Core/VideoCommon/Src/TextureCacheBase.h index 201bd1dc65..9561e3b1e4 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.h +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.h @@ -7,6 +7,7 @@ #include "VideoCommon.h" #include "TextureDecoder.h" #include "BPMemory.h" +#include "Thread.h" #include "CommonTypes.h" @@ -71,6 +72,8 @@ public: virtual ~TextureCache(); // needs virtual for DX11 dtor + static void Init(); + static void Shutdown(); static void Cleanup(); static void Invalidate(bool shutdown); @@ -88,6 +91,8 @@ public: static void CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, const EFBRectangle &source_rect); + static Common::CriticalSection texMutex; + protected: TextureCache(); diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index 84c53b3ebf..7b07430c8f 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -158,7 +158,9 @@ void VertexManager::AddVertices(int primitive, int numVertices) void VertexManager::Flush() { + TextureCache::texMutex.Enter(); g_vertex_manager->vFlush(); + TextureCache::texMutex.Leave(); } // TODO: need to merge more stuff into VideoCommon to use this diff --git a/Source/Core/VideoUICommon/Src/VideoConfigDiag.cpp b/Source/Core/VideoUICommon/Src/VideoConfigDiag.cpp index 1cad9f4c02..275e88e77e 100644 --- a/Source/Core/VideoUICommon/Src/VideoConfigDiag.cpp +++ b/Source/Core/VideoUICommon/Src/VideoConfigDiag.cpp @@ -2,6 +2,7 @@ #include "VideoConfigDiag.h" #include "FileUtil.h" +#include "TextureCacheBase.h" #include @@ -58,6 +59,8 @@ void VideoConfigDiag::Event_Close(wxCloseEvent& ev) g_Config.Save((File::GetUserPath(D_CONFIG_IDX) + ininame + ".ini").c_str()); ev.Skip(); + + TextureCache::Invalidate(false); // For settings like hi-res textures/texture format/etc. }