diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index 88556cbee8..c975b12240 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -497,18 +497,39 @@ void BPWritten(const BPCmd& bp) // if this is different from 0, manual TMEM management is used (GX_PreloadEntireTexture). if (bp.newvalue != 0) { - // NOTE(neobrain): Apparently tmemodd doesn't affect hardware behavior at all (libogc uses it just as a buffer and switches its contents with tmemeven whenever this is called) + // TODO: Not quite sure if this is completely correct (likely not) + // NOTE: libogc's implementation of GX_PreloadEntireTexture seems flawed, so it's not necessarily a good reference for RE'ing this feature. + BPS_TmemConfig& tmem_cfg = bpmem.tmem_config; - u8* ram_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5); - u32 tmem_addr = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE; - u32 size = tmem_cfg.preload_tile_info.count * 32; + u8* src_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5); // TODO: Should we add mask here on GC? + u32 size = tmem_cfg.preload_tile_info.count * TMEM_LINE_SIZE; + u32 tmem_addr_even = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE; - // Check if the game has overflowed TMEM, and copy up to the limit. - // Paper Mario does this when entering the Great Boogly Tree (Chap 2) - if ((tmem_addr + size) > TMEM_SIZE) - size = TMEM_SIZE - tmem_addr; + if (tmem_cfg.preload_tile_info.type != 3) + { + if (tmem_addr_even + size > TMEM_SIZE) + size = TMEM_SIZE - tmem_addr_even; - memcpy(texMem + tmem_addr, ram_ptr, size); + memcpy(texMem + tmem_addr_even, src_ptr, size); + } + else // RGBA8 tiles (and CI14, but that might just be stupid libogc!) + { + // AR and GB tiles are stored in separate TMEM banks => can't use a single memcpy for everything + u32 tmem_addr_odd = tmem_cfg.preload_tmem_odd * TMEM_LINE_SIZE; + + for (int i = 0; i < tmem_cfg.preload_tile_info.count; ++i) + { + if (tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE || + tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE) + break; + + memcpy(texMem + tmem_addr_even, src_ptr, TMEM_LINE_SIZE); + memcpy(texMem + tmem_addr_odd, src_ptr + TMEM_LINE_SIZE, TMEM_LINE_SIZE); + tmem_addr_even += TMEM_LINE_SIZE; + tmem_addr_odd += TMEM_LINE_SIZE; + src_ptr += TMEM_LINE_SIZE * 2; + } + } } break; diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp index cfbd4eb6c8..e0fc11fe1b 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp @@ -333,6 +333,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage, if (from_tmem) src_data = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE]; else src_data = Memory::GetPointer(address); + // TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data from the low tmem bank than it should) tex_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); if (isPaletteTexture) { @@ -413,10 +414,19 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage, } } - // TODO: RGBA8 textures are stored non-continuously in tmem, that might cause problems here when preloading is enabled if (!using_custom_texture) - pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, - expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures); + { + if (!(texformat == GX_TF_RGBA8 && from_tmem)) + { + pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, + expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures); + } + else + { + u8* src_data_gb = &texMem[bpmem.tex[stage/4].texImage2[stage%4].tmem_odd * TMEM_LINE_SIZE]; + pcfmt = TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight); + } + } // TODO: Cleanup. Plus, we still autogenerate mipmaps in certain cases (we shouldn't do that) bool isPow2; diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index 362a42e015..44efcf8bae 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -2519,7 +2519,7 @@ void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* sr { u16 sBlk = s >> 2; u16 tBlk = t >> 2; - u16 widthBlks = (imageWidth >> 2) + 1; + u16 widthBlks = (imageWidth >> 2) + 1; // TODO: Looks wrong. Shouldn't this be ((imageWidth-1)>>2)+1 ? u32 base_ar = (tBlk * widthBlks + sBlk) << 4; u32 base_gb = (tBlk * widthBlks + sBlk) << 4; u16 blkS = s & 3; @@ -2537,6 +2537,18 @@ void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* sr dst[2] = val_addr_gb[1]; // B } +PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height) +{ + // TODO for someone who cares: Make this less slow! + for (int y = 0; y < height; ++y) + for (int x = 0; x < width; ++x) + { + TexDecoder_DecodeTexelRGBA8FromTmem(dst, src_ar, src_gb, x, y, width-1); + dst += 4; + } + + return PC_TEX_FMT_RGBA32; +} const char* texfmt[] = { // pixel diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.h b/Source/Core/VideoCommon/Src/TextureDecoder.h index dd4b17eea3..fae70897f5 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.h +++ b/Source/Core/VideoCommon/Src/TextureDecoder.h @@ -87,6 +87,7 @@ PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, in PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt); void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt); void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth); +PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height); void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); #endif