Add support for hybrid XFB

This commit is contained in:
iwubcode
2017-05-29 17:02:09 -05:00
parent 84ca9a4aec
commit 79387dddb2
45 changed files with 400 additions and 985 deletions

View File

@ -238,7 +238,8 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma
if (!decoded_entry)
return nullptr;
decoded_entry->SetGeneralParameters(entry->addr, entry->size_in_bytes, entry->format);
decoded_entry->SetGeneralParameters(entry->addr, entry->size_in_bytes, entry->format,
entry->should_force_safe_hashing);
decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1);
decoded_entry->SetHashes(entry->base_hash, entry->hash);
decoded_entry->frameCount = FRAMECOUNT_INVALID;
@ -462,20 +463,6 @@ static u32 CalculateLevelSize(u32 level_0_size, u32 level)
return std::max(level_0_size >> level, 1u);
}
// Used by TextureCacheBase::Load
TextureCacheBase::TCacheEntry* TextureCacheBase::ReturnEntry(unsigned int stage, TCacheEntry* entry)
{
entry->frameCount = FRAMECOUNT_INVALID;
bound_textures[stage] = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true);
// We need to keep track of invalided textures until they have actually been replaced or re-loaded
valid_bind_points.set(stage);
return entry;
}
void TextureCacheBase::BindTextures()
{
for (size_t i = 0; i < bound_textures.size(); ++i)
@ -625,7 +612,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// if this stage was not invalidated by changes to texture registers, keep the current texture
if (IsValidBindPoint(stage) && bound_textures[stage])
{
return ReturnEntry(stage, bound_textures[stage]);
return bound_textures[stage];
}
const FourTexUnits& tex = bpmem.tex[stage >> 2];
@ -639,7 +626,35 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
const bool use_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]);
u32 tex_levels = use_mipmaps ? ((tex.texMode1[id].max_lod + 0xf) / 0x10 + 1) : 1;
const bool from_tmem = tex.texImage1[id].image_type != 0;
const u32 tmem_address_even = from_tmem ? tex.texImage1[id].tmem_even * TMEM_LINE_SIZE : 0;
const u32 tmem_address_odd = from_tmem ? tex.texImage2[id].tmem_odd * TMEM_LINE_SIZE : 0;
auto entry = GetTexture(address, width, height, texformat,
g_ActiveConfig.iSafeTextureCache_ColorSamples, tlutaddr, tlutfmt,
use_mipmaps, tex_levels, from_tmem, tmem_address_even,
tmem_address_odd);
if (!entry)
return nullptr;
entry->frameCount = FRAMECOUNT_INVALID;
bound_textures[stage] = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true);
// We need to keep track of invalided textures until they have actually been replaced or re-loaded
valid_bind_points.set(stage);
return entry;
}
TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 width, u32 height,
const TextureFormat texformat,
const int textureCacheSafetyColorSampleSize, u32 tlutaddr,
TLUTFormat tlutfmt, bool use_mipmaps,
u32 tex_levels, bool from_tmem, u32 tmem_address_even,
u32 tmem_address_odd)
{
// TexelSizeInNibbles(format) * width * height / 16;
const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat);
const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat);
@ -683,9 +698,12 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
}
// TODO: the texture cache lookup is based on address, but a texture from tmem has no reason
// to have a unique and valid address. This could result in a regular texture and a tmem
// texture aliasing onto the same texture cache entry.
const u8* src_data;
if (from_tmem)
src_data = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE];
src_data = &texMem[tmem_address_even];
else
src_data = Memory::GetPointer(address);
@ -704,13 +722,13 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data
// from the low tmem bank than it should)
base_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
base_hash = GetHash64(src_data, texture_size, textureCacheSafetyColorSampleSize);
u32 palette_size = 0;
if (isPaletteTexture)
{
palette_size = TexDecoder_GetPaletteSize(texformat);
full_hash = base_hash ^ GetHash64(&texMem[tlutaddr], palette_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples);
textureCacheSafetyColorSampleSize);
}
else
{
@ -789,7 +807,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// texture formats. I'm not sure what effect checking width/height/levels
// would have.
if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion)
return ReturnEntry(stage, entry);
return entry;
// Note that we found an unconverted EFB copy, then continue. We'll
// perform the conversion later. Currently, we only convert EFB copies to
@ -816,7 +834,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
return ReturnEntry(stage, entry);
return entry;
}
}
@ -841,7 +859,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
if (decoded_entry)
{
return ReturnEntry(stage, decoded_entry);
return decoded_entry;
}
}
@ -851,9 +869,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// textures cause unnecessary slowdowns
// Example: Tales of Symphonia (GC) uses over 500 small textures in menus, but only around 70
// different ones
if (g_ActiveConfig.iSafeTextureCache_ColorSamples == 0 ||
if (textureCacheSafetyColorSampleSize == 0 ||
std::max(texture_size, palette_size) <=
(u32)g_ActiveConfig.iSafeTextureCache_ColorSamples * 8)
(u32)textureCacheSafetyColorSampleSize * 8)
{
auto hash_range = textures_by_hash.equal_range(full_hash);
TexHashCache::iterator hash_iter = hash_range.first;
@ -866,7 +884,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{
entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt);
return ReturnEntry(stage, entry);
return entry;
}
++hash_iter;
}
@ -936,64 +954,66 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// Initialized to null because only software loading uses this buffer
u8* dst_buffer = nullptr;
if (!hires_tex && decode_on_gpu)
if (!hires_tex)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, row_stride, tlut, tlutfmt);
}
else if (!hires_tex)
{
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
// Allocate memory for all levels at once
size_t total_texture_size = decoded_texture_size;
// For the downsample, we need 2 buffers; 1 is 1/4 of the original texture, the other 1/16
size_t mip_downsample_buffer_size = decoded_texture_size * 5 / 16;
size_t prev_level_size = decoded_texture_size;
for (u32 i = 1; i < tex_levels; ++i)
if (decode_on_gpu)
{
prev_level_size /= 4;
total_texture_size += prev_level_size;
}
// Add space for the downsampling at the end
total_texture_size += mip_downsample_buffer_size;
CheckTempSize(total_texture_size);
dst_buffer = temp;
if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{
TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut,
tlutfmt);
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(
entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, row_stride, tlut, tlutfmt);
}
else
{
u8* src_data_gb =
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth,
expandedHeight);
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
// Allocate memory for all levels at once
size_t total_texture_size = decoded_texture_size;
// For the downsample, we need 2 buffers; 1 is 1/4 of the original texture, the other 1/16
size_t mip_downsample_buffer_size = decoded_texture_size * 5 / 16;
size_t prev_level_size = decoded_texture_size;
for (u32 i = 1; i < tex_levels; ++i)
{
prev_level_size /= 4;
total_texture_size += prev_level_size;
}
// Add space for the downsampling at the end
total_texture_size += mip_downsample_buffer_size;
CheckTempSize(total_texture_size);
dst_buffer = temp;
if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{
TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut,
tlutfmt);
}
else
{
u8* src_data_gb =
&texMem[tmem_address_odd];
TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth, expandedHeight);
}
entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size);
arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer);
dst_buffer += decoded_texture_size;
}
entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size);
arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer);
dst_buffer += decoded_texture_size;
}
iter = textures_by_address.emplace(address, entry);
if (g_ActiveConfig.iSafeTextureCache_ColorSamples == 0 ||
if (textureCacheSafetyColorSampleSize == 0 ||
std::max(texture_size, palette_size) <=
(u32)g_ActiveConfig.iSafeTextureCache_ColorSamples * 8)
(u32)textureCacheSafetyColorSampleSize * 8)
{
entry->textures_by_hash_iter = textures_by_hash.emplace(full_hash, entry);
}
entry->SetGeneralParameters(address, texture_size, full_format);
entry->SetGeneralParameters(address, texture_size, full_format, false);
entry->SetDimensions(nativeW, nativeH, tex_levels);
entry->SetHashes(base_hash, full_hash);
entry->is_efb_copy = false;
@ -1025,9 +1045,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
const u8* ptr_odd = nullptr;
if (from_tmem)
{
ptr_even = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE +
texture_size];
ptr_odd = &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
ptr_even = &texMem[tmem_address_even + texture_size];
ptr_odd = &texMem[tmem_address_odd];
}
for (u32 level = 1; level != texLevels; ++level)
@ -1081,7 +1100,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
return ReturnEntry(stage, entry);
return entry;
}
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat,
@ -1159,6 +1178,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
unsigned int cbufid = UINT_MAX;
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
bool efbHasAlpha = srcFormat == PEControl::RGBA6_Z24;
bool copy_to_ram = !g_ActiveConfig.bSkipEFBCopyToRam;
bool copy_to_vram = g_ActiveConfig.backend_info.bSupportsCopyToVram;
bool is_xfb_copy = false;
if (is_depth_copy)
{
@ -1388,6 +1411,15 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
}
break;
case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
ColorMask[3] = 0.0f;
fConstAdd[3] = 1.0f;
cbufid = 30; // just re-use the RGBX8 cbufid from above
copy_to_ram = g_ActiveConfig.bUseRealXFB;
is_xfb_copy = true;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(dstFormat));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
@ -1430,9 +1462,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * dstStride;
bool copy_to_ram = !g_ActiveConfig.bSkipEFBCopyToRam;
bool copy_to_vram = true;
if (copy_to_ram)
{
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
@ -1524,7 +1553,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
if (entry)
{
entry->SetGeneralParameters(dstAddr, 0, baseFormat);
entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
entry->SetDimensions(tex_w, tex_h, 1);
entry->frameCount = FRAMECOUNT_INVALID;
@ -1537,7 +1566,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
if (g_ActiveConfig.bDumpEFBTarget)
if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy)
{
static int count = 0;
entry->texture->Save(StringFromFormat("%sefb_frame_%i.png",
@ -1699,12 +1728,22 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride)
size_in_bytes = memory_stride * NumBlocksY();
}
int TextureCacheBase::TCacheEntry::HashSampleSize() const
{
if (should_force_safe_hashing)
{
return 0;
}
return g_ActiveConfig.iSafeTextureCache_ColorSamples;
}
u64 TextureCacheBase::TCacheEntry::CalculateHash() const
{
u8* ptr = Memory::GetPointer(addr);
if (memory_stride == BytesPerRow())
{
return GetHash64(ptr, size_in_bytes, g_ActiveConfig.iSafeTextureCache_ColorSamples);
return GetHash64(ptr, size_in_bytes, HashSampleSize());
}
else
{
@ -1712,11 +1751,11 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const
u64 temp_hash = size_in_bytes;
u32 samples_per_row = 0;
if (g_ActiveConfig.iSafeTextureCache_ColorSamples != 0)
if (HashSampleSize() != 0)
{
// Hash at least 4 samples per row to avoid hashing in a bad pattern, like just on the left
// side of the efb copy
samples_per_row = std::max(g_ActiveConfig.iSafeTextureCache_ColorSamples / blocks, 4u);
samples_per_row = std::max(HashSampleSize() / blocks, 4u);
}
for (u32 i = 0; i < blocks; i++)