2015-05-23 22:55:12 -06:00
// Copyright 2010 Dolphin Emulator Project
2015-05-17 17:08:10 -06:00
// Licensed under GPLv2+
2013-04-17 21:09:55 -06:00
// Refer to the license.txt file included.
2010-10-19 16:24:27 -06:00
2014-05-02 20:47:04 -06:00
# include <algorithm>
2014-06-02 23:08:54 -06:00
# include <string>
2014-05-02 20:47:04 -06:00
2014-02-17 03:18:15 -07:00
# include "Common/FileUtil.h"
# include "Common/MemoryUtil.h"
2014-06-02 23:08:54 -06:00
# include "Common/StringUtil.h"
2010-10-19 16:24:27 -06:00
2014-02-17 03:18:15 -07:00
# include "Core/ConfigManager.h"
2015-09-06 08:31:32 -06:00
# include "Core/FifoPlayer/FifoPlayer.h"
2015-09-07 09:05:47 -06:00
# include "Core/FifoPlayer/FifoRecorder.h"
2014-02-17 03:18:15 -07:00
# include "Core/HW/Memmap.h"
2010-10-19 16:24:27 -06:00
2014-02-17 03:18:15 -07:00
# include "VideoCommon/Debugger.h"
2015-01-11 07:03:41 -07:00
# include "VideoCommon/FramebufferManagerBase.h"
2014-02-17 03:18:15 -07:00
# include "VideoCommon/HiresTextures.h"
# include "VideoCommon/RenderBase.h"
# include "VideoCommon/Statistics.h"
# include "VideoCommon/TextureCacheBase.h"
2015-09-07 09:05:47 -06:00
# include "VideoCommon/VideoCommon.h"
2014-02-17 03:18:15 -07:00
# include "VideoCommon/VideoConfig.h"
2010-10-19 16:24:27 -06:00
2014-05-19 10:27:59 -06:00
static const u64 TEXHASH_INVALID = 0 ;
2015-09-12 00:59:14 -06:00
static const int TEXTURE_KILL_THRESHOLD = 64 ; // Sonic the Fighters (inside Sonic Gems Collection) loops a 64 frames animation
2015-01-17 02:29:10 -07:00
static const int TEXTURE_POOL_KILL_THRESHOLD = 3 ;
2015-02-23 17:40:48 -07:00
static const int FRAMECOUNT_INVALID = 0 ;
2010-10-19 16:24:27 -06:00
2015-09-05 09:44:21 -06:00
TextureCache * g_texture_cache ;
2010-10-19 16:24:27 -06:00
2015-09-05 09:44:21 -06:00
alignas ( 16 ) u8 * TextureCache : : temp = nullptr ;
2014-12-22 04:53:03 -07:00
size_t TextureCache : : temp_size ;
2011-02-05 03:08:06 -07:00
2015-02-22 10:42:19 -07:00
TextureCache : : TexCache TextureCache : : textures_by_address ;
TextureCache : : TexCache TextureCache : : textures_by_hash ;
2015-01-17 02:57:19 -07:00
TextureCache : : TexPool TextureCache : : texture_pool ;
2015-01-26 16:33:23 -07:00
TextureCache : : TCacheEntryBase * TextureCache : : bound_textures [ 8 ] ;
2012-05-28 03:31:37 -06:00
TextureCache : : BackupConfig TextureCache : : backup_config ;
2014-07-08 07:58:25 -06:00
static bool invalidate_texture_cache_requested ;
2010-10-19 16:24:27 -06:00
TextureCache : : TCacheEntryBase : : ~ TCacheEntryBase ( )
{
}
2014-12-22 04:53:03 -07:00
void TextureCache : : CheckTempSize ( size_t required_size )
{
if ( required_size < = temp_size )
return ;
temp_size = required_size ;
FreeAlignedMemory ( temp ) ;
temp = ( u8 * ) AllocateAlignedMemory ( temp_size , 16 ) ;
}
2010-10-19 16:24:27 -06:00
TextureCache : : TextureCache ( )
{
2012-05-13 07:38:56 -06:00
temp_size = 2048 * 2048 * 4 ;
2011-02-05 03:08:06 -07:00
if ( ! temp )
2012-05-13 07:38:56 -06:00
temp = ( u8 * ) AllocateAlignedMemory ( temp_size , 16 ) ;
2013-04-24 07:21:54 -06:00
2010-10-19 16:24:27 -06:00
TexDecoder_SetTexFmtOverlayOptions ( g_ActiveConfig . bTexFmtOverlayEnable , g_ActiveConfig . bTexFmtOverlayCenter ) ;
2013-04-24 07:21:54 -06:00
2015-03-01 15:53:15 -07:00
HiresTexture : : Init ( ) ;
2013-04-24 07:21:54 -06:00
2014-12-22 14:35:08 -07:00
SetHash64Function ( ) ;
2013-06-07 19:28:54 -06:00
invalidate_texture_cache_requested = false ;
}
void TextureCache : : RequestInvalidateTextureCache ( )
{
invalidate_texture_cache_requested = true ;
2010-10-19 16:24:27 -06:00
}
2012-05-28 03:37:14 -06:00
void TextureCache : : Invalidate ( )
2010-10-19 16:24:27 -06:00
{
2015-01-26 16:33:23 -07:00
UnbindTextures ( ) ;
2015-02-22 10:42:19 -07:00
for ( auto & tex : textures_by_address )
2014-03-02 22:25:15 -07:00
{
delete tex . second ;
}
2015-02-22 10:42:19 -07:00
textures_by_address . clear ( ) ;
textures_by_hash . clear ( ) ;
2014-06-11 17:04:42 -06:00
2015-01-17 02:29:10 -07:00
for ( auto & rt : texture_pool )
2014-06-11 17:04:42 -06:00
{
2015-01-17 02:57:19 -07:00
delete rt . second ;
2014-06-11 17:04:42 -06:00
}
2015-01-17 02:29:10 -07:00
texture_pool . clear ( ) ;
2010-10-19 16:24:27 -06:00
}
TextureCache : : ~ TextureCache ( )
{
2015-03-01 15:53:15 -07:00
HiresTexture : : Shutdown ( ) ;
2012-06-27 12:19:04 -06:00
Invalidate ( ) ;
2014-02-23 15:03:39 -07:00
FreeAlignedMemory ( temp ) ;
2014-03-09 14:14:26 -06:00
temp = nullptr ;
2010-10-19 16:24:27 -06:00
}
2012-05-28 03:31:37 -06:00
void TextureCache : : OnConfigChanged ( VideoConfig & config )
{
2013-02-15 19:46:03 -07:00
if ( g_texture_cache )
2012-05-28 03:37:14 -06:00
{
2015-03-01 15:53:15 -07:00
if ( config . bHiresTextures ! = backup_config . s_hires_textures | |
config . bCacheHiresTextures ! = backup_config . s_cache_hires_textures )
{
HiresTexture : : Update ( ) ;
}
2013-02-15 19:46:03 -07:00
// TODO: Invalidating texcache is really stupid in some of these cases
if ( config . iSafeTextureCache_ColorSamples ! = backup_config . s_colorsamples | |
config . bTexFmtOverlayEnable ! = backup_config . s_texfmt_overlay | |
config . bTexFmtOverlayCenter ! = backup_config . s_texfmt_overlay_center | |
2013-06-07 19:28:54 -06:00
config . bHiresTextures ! = backup_config . s_hires_textures | |
invalidate_texture_cache_requested )
2013-02-15 19:46:03 -07:00
{
g_texture_cache - > Invalidate ( ) ;
2012-05-28 03:37:14 -06:00
2013-02-15 19:46:03 -07:00
TexDecoder_SetTexFmtOverlayOptions ( g_ActiveConfig . bTexFmtOverlayEnable , g_ActiveConfig . bTexFmtOverlayCenter ) ;
2013-06-07 19:28:54 -06:00
invalidate_texture_cache_requested = false ;
2013-02-15 19:46:03 -07:00
}
2012-05-28 03:31:37 -06:00
2014-11-08 08:19:15 -07:00
if ( ( config . iStereoMode > 0 ) ! = backup_config . s_stereo_3d | |
2014-12-24 15:06:44 -07:00
config . bStereoEFBMonoDepth ! = backup_config . s_efb_mono_depth )
2014-11-03 16:53:14 -07:00
{
g_texture_cache - > DeleteShaders ( ) ;
g_texture_cache - > CompileShaders ( ) ;
}
2012-05-28 03:31:37 -06:00
}
2013-10-28 23:23:17 -06:00
2012-05-28 03:31:37 -06:00
backup_config . s_colorsamples = config . iSafeTextureCache_ColorSamples ;
backup_config . s_texfmt_overlay = config . bTexFmtOverlayEnable ;
backup_config . s_texfmt_overlay_center = config . bTexFmtOverlayCenter ;
backup_config . s_hires_textures = config . bHiresTextures ;
2015-03-01 15:53:15 -07:00
backup_config . s_cache_hires_textures = config . bCacheHiresTextures ;
2014-11-03 16:53:14 -07:00
backup_config . s_stereo_3d = config . iStereoMode > 0 ;
2014-12-24 15:06:44 -07:00
backup_config . s_efb_mono_depth = config . bStereoEFBMonoDepth ;
2012-05-28 03:31:37 -06:00
}
2014-05-19 10:31:38 -06:00
void TextureCache : : Cleanup ( int _frameCount )
2010-10-19 16:24:27 -06:00
{
2015-02-22 10:42:19 -07:00
TexCache : : iterator iter = textures_by_address . begin ( ) ;
TexCache : : iterator tcend = textures_by_address . end ( ) ;
2010-10-19 16:24:27 -06:00
while ( iter ! = tcend )
{
2015-02-15 12:43:31 -07:00
if ( iter - > second - > frameCount = = FRAMECOUNT_INVALID )
2014-05-19 10:31:38 -06:00
{
iter - > second - > frameCount = _frameCount ;
2015-09-07 12:53:38 -06:00
+ + iter ;
2014-05-19 10:31:38 -06:00
}
2015-09-07 12:53:38 -06:00
else if ( _frameCount > TEXTURE_KILL_THRESHOLD + iter - > second - > frameCount )
2010-10-19 16:24:27 -06:00
{
2015-09-07 12:53:38 -06:00
if ( iter - > second - > IsEfbCopy ( ) )
{
// Only remove EFB copies when they wouldn't be used anymore(changed hash), because EFB copies living on the
// host GPU are unrecoverable. Perform this check only every TEXTURE_KILL_THRESHOLD for performance reasons
if ( ( _frameCount - iter - > second - > frameCount ) % TEXTURE_KILL_THRESHOLD = = 1 & &
2015-09-13 05:30:56 -06:00
iter - > second - > hash ! = iter - > second - > CalculateHash ( ) )
2015-09-07 12:53:38 -06:00
{
iter = FreeTexture ( iter ) ;
}
else
{
+ + iter ;
}
}
else
{
iter = FreeTexture ( iter ) ;
}
2010-10-19 16:24:27 -06:00
}
else
2013-04-24 07:21:54 -06:00
{
2010-10-19 16:24:27 -06:00
+ + iter ;
2013-04-24 07:21:54 -06:00
}
2010-10-19 16:24:27 -06:00
}
2014-06-11 17:04:42 -06:00
2015-01-17 02:57:19 -07:00
TexPool : : iterator iter2 = texture_pool . begin ( ) ;
TexPool : : iterator tcend2 = texture_pool . end ( ) ;
while ( iter2 ! = tcend2 )
2014-06-11 17:04:42 -06:00
{
2015-02-15 12:43:31 -07:00
if ( iter2 - > second - > frameCount = = FRAMECOUNT_INVALID )
2015-01-17 02:57:19 -07:00
{
iter2 - > second - > frameCount = _frameCount ;
}
if ( _frameCount > TEXTURE_POOL_KILL_THRESHOLD + iter2 - > second - > frameCount )
2014-06-11 17:04:42 -06:00
{
2015-01-17 02:57:19 -07:00
delete iter2 - > second ;
iter2 = texture_pool . erase ( iter2 ) ;
2014-06-11 17:04:42 -06:00
}
else
{
2015-01-17 02:57:19 -07:00
+ + iter2 ;
2014-06-11 17:04:42 -06:00
}
}
2010-10-19 16:24:27 -06:00
}
2014-05-16 13:57:14 -06:00
bool TextureCache : : TCacheEntryBase : : OverlapsMemoryRange ( u32 range_address , u32 range_size ) const
2010-10-19 16:24:27 -06:00
{
2015-02-19 16:19:31 -07:00
if ( addr + size_in_bytes < = range_address )
2014-05-16 13:57:14 -06:00
return false ;
2010-10-19 16:24:27 -06:00
2015-02-19 16:19:31 -07:00
if ( addr > = range_address + range_size )
2015-01-26 16:33:23 -07:00
return false ;
2012-03-20 15:36:21 -06:00
2015-01-26 16:33:23 -07:00
return true ;
2010-10-19 16:24:27 -06:00
}
2015-06-29 19:19:19 -06:00
TextureCache : : TCacheEntryBase * TextureCache : : DoPartialTextureUpdates ( TexCache : : iterator iter_t )
2015-06-13 07:51:58 -06:00
{
2015-06-29 19:19:19 -06:00
TCacheEntryBase * entry_to_update = iter_t - > second ;
const bool isPaletteTexture = ( entry_to_update - > format = = GX_TF_C4
| | entry_to_update - > format = = GX_TF_C8
| | entry_to_update - > format = = GX_TF_C14X2
| | entry_to_update - > format > = 0x10000 ) ;
2015-06-13 07:51:58 -06:00
// Efb copies and paletted textures are excluded from these updates, until there's an example where a game would
// benefit from this. Both would require more work to be done.
// TODO: Implement upscaling support for normal textures, and then remove the efb to ram and the scaled efb restrictions
2015-06-29 19:19:19 -06:00
if ( entry_to_update - > IsEfbCopy ( )
| | isPaletteTexture )
return entry_to_update ;
2015-06-13 07:51:58 -06:00
2015-06-29 19:19:19 -06:00
u32 block_width = TexDecoder_GetBlockWidthInTexels ( entry_to_update - > format ) ;
u32 block_height = TexDecoder_GetBlockHeightInTexels ( entry_to_update - > format ) ;
u32 block_size = block_width * block_height * TexDecoder_GetTexelSizeInNibbles ( entry_to_update - > format ) / 2 ;
2015-06-13 07:51:58 -06:00
2015-06-29 19:19:19 -06:00
u32 numBlocksX = ( entry_to_update - > native_width + block_width - 1 ) / block_width ;
2015-06-13 07:51:58 -06:00
2015-06-29 19:19:19 -06:00
TexCache : : iterator iter = textures_by_address . lower_bound ( entry_to_update - > addr ) ;
TexCache : : iterator iterend = textures_by_address . upper_bound ( entry_to_update - > addr + entry_to_update - > size_in_bytes ) ;
bool entry_need_scaling = true ;
2015-06-13 07:51:58 -06:00
while ( iter ! = iterend )
{
TCacheEntryBase * entry = iter - > second ;
2015-06-29 19:19:19 -06:00
if ( entry ! = entry_to_update
& & entry - > IsEfbCopy ( )
& & entry_to_update - > addr < = entry - > addr
& & entry - > addr + entry - > size_in_bytes < = entry_to_update - > addr + entry_to_update - > size_in_bytes
& & entry - > frameCount = = FRAMECOUNT_INVALID
2015-09-04 08:45:29 -06:00
& & entry - > memory_stride = = numBlocksX * block_size )
2015-06-13 07:51:58 -06:00
{
2015-10-18 07:43:55 -06:00
if ( entry - > hash = = entry - > CalculateHash ( ) )
2015-06-29 19:19:19 -06:00
{
2015-10-18 07:43:55 -06:00
u32 block_offset = ( entry - > addr - entry_to_update - > addr ) / block_size ;
u32 block_x = block_offset % numBlocksX ;
u32 block_y = block_offset / numBlocksX ;
u32 x = block_x * block_width ;
u32 y = block_y * block_height ;
MathUtil : : Rectangle < int > srcrect , dstrect ;
srcrect . left = 0 ;
srcrect . top = 0 ;
dstrect . left = 0 ;
dstrect . top = 0 ;
if ( entry_need_scaling )
2015-06-29 19:19:19 -06:00
{
2015-10-18 07:43:55 -06:00
entry_need_scaling = false ;
u32 w = entry_to_update - > native_width * entry - > config . width / entry - > native_width ;
u32 h = entry_to_update - > native_height * entry - > config . height / entry - > native_height ;
u32 max = g_renderer - > GetMaxTextureSize ( ) ;
if ( max < w | | max < h )
{
iter + + ;
continue ;
}
if ( entry_to_update - > config . width ! = w | | entry_to_update - > config . height ! = h )
{
TextureCache : : TCacheEntryConfig newconfig ;
newconfig . width = w ;
newconfig . height = h ;
newconfig . rendertarget = true ;
TCacheEntryBase * newentry = AllocateTexture ( newconfig ) ;
newentry - > SetGeneralParameters ( entry_to_update - > addr , entry_to_update - > size_in_bytes , entry_to_update - > format ) ;
newentry - > SetDimensions ( entry_to_update - > native_width , entry_to_update - > native_height , 1 ) ;
newentry - > SetHashes ( entry_to_update - > base_hash , entry_to_update - > hash ) ;
newentry - > frameCount = frameCount ;
newentry - > is_efb_copy = false ;
srcrect . right = entry_to_update - > config . width ;
srcrect . bottom = entry_to_update - > config . height ;
dstrect . right = w ;
dstrect . bottom = h ;
newentry - > CopyRectangleFromTexture ( entry_to_update , srcrect , dstrect ) ;
entry_to_update = newentry ;
u64 key = iter_t - > first ;
iter_t = FreeTexture ( iter_t ) ;
textures_by_address . emplace ( key , entry_to_update ) ;
}
2015-06-29 19:19:19 -06:00
}
2015-10-18 07:43:55 -06:00
srcrect . right = entry - > config . width ;
srcrect . bottom = entry - > config . height ;
dstrect . left = x * entry_to_update - > config . width / entry_to_update - > native_width ;
dstrect . top = y * entry_to_update - > config . height / entry_to_update - > native_height ;
dstrect . right = ( x + entry - > native_width ) * entry_to_update - > config . width / entry_to_update - > native_width ;
dstrect . bottom = ( y + entry - > native_height ) * entry_to_update - > config . height / entry_to_update - > native_height ;
entry_to_update - > CopyRectangleFromTexture ( entry , srcrect , dstrect ) ;
// Mark the texture update as used, so it isn't applied more than once
entry - > frameCount = frameCount ;
}
else
{
// If the hash does not match, this EFB copy will not be used for anything, so remove it
iter = FreeTexture ( iter ) ;
continue ;
2015-06-29 19:19:19 -06:00
}
2015-06-13 07:51:58 -06:00
}
+ + iter ;
}
2015-06-29 19:19:19 -06:00
return entry_to_update ;
2015-06-13 07:51:58 -06:00
}
2014-12-22 14:33:38 -07:00
void TextureCache : : DumpTexture ( TCacheEntryBase * entry , std : : string basename , unsigned int level )
2012-05-12 05:25:13 -06:00
{
2012-05-12 05:31:09 -06:00
std : : string szDir = File : : GetUserPath ( D_DUMPTEXTURES_IDX ) +
2015-06-12 05:56:53 -06:00
SConfig : : GetInstance ( ) . m_strUniqueID ;
2012-05-12 05:31:09 -06:00
// make sure that the directory exists
2014-03-12 13:33:41 -06:00
if ( ! File : : Exists ( szDir ) | | ! File : : IsDirectory ( szDir ) )
File : : CreateDir ( szDir ) ;
2012-05-12 05:31:09 -06:00
2014-12-22 14:33:38 -07:00
if ( level > 0 )
2012-05-12 05:50:03 -06:00
{
2014-12-22 14:33:38 -07:00
basename + = StringFromFormat ( " _mip%i " , level ) ;
2012-05-12 05:50:03 -06:00
}
2014-12-22 14:33:38 -07:00
std : : string filename = szDir + " / " + basename + " .png " ;
2012-05-12 05:31:09 -06:00
2013-11-16 15:12:07 -07:00
if ( ! File : : Exists ( filename ) )
entry - > Save ( filename , level ) ;
2012-05-12 05:31:09 -06:00
}
2013-02-15 19:46:03 -07:00
static u32 CalculateLevelSize ( u32 level_0_size , u32 level )
{
return ( level_0_size + ( ( 1 < < level ) - 1 ) ) > > level ;
}
// Used by TextureCache::Load
2015-01-26 16:33:23 -07:00
TextureCache : : TCacheEntryBase * TextureCache : : ReturnEntry ( unsigned int stage , TCacheEntryBase * entry )
2013-02-15 19:46:03 -07:00
{
2014-05-19 10:31:38 -06:00
entry - > frameCount = FRAMECOUNT_INVALID ;
2015-01-26 16:33:23 -07:00
bound_textures [ stage ] = entry ;
2013-02-15 19:46:03 -07:00
GFX_DEBUGGER_PAUSE_AT ( NEXT_TEXTURE_CHANGE , true ) ;
return entry ;
}
2015-01-26 16:33:23 -07:00
void TextureCache : : BindTextures ( )
{
for ( int i = 0 ; i < 8 ; + + i )
{
if ( bound_textures [ i ] )
bound_textures [ i ] - > Bind ( i ) ;
}
}
void TextureCache : : UnbindTextures ( )
{
std : : fill ( std : : begin ( bound_textures ) , std : : end ( bound_textures ) , nullptr ) ;
}
2015-01-11 04:48:04 -07:00
TextureCache : : TCacheEntryBase * TextureCache : : Load ( const u32 stage )
2010-10-19 16:24:27 -06:00
{
2015-01-11 04:48:04 -07:00
const FourTexUnits & tex = bpmem . tex [ stage > > 2 ] ;
const u32 id = stage & 3 ;
const u32 address = ( tex . texImage3 [ id ] . image_base /* & 0x1FFFFF*/ ) < < 5 ;
u32 width = tex . texImage0 [ id ] . width + 1 ;
u32 height = tex . texImage0 [ id ] . height + 1 ;
const int texformat = tex . texImage0 [ id ] . format ;
const u32 tlutaddr = tex . texTlut [ id ] . tmem_offset < < 9 ;
const u32 tlutfmt = tex . texTlut [ id ] . tlut_format ;
2015-03-01 05:04:48 -07:00
const bool use_mipmaps = ( tex . texMode0 [ id ] . min_filter & 3 ) ! = 0 ;
u32 tex_levels = use_mipmaps ? ( ( tex . texMode1 [ id ] . max_lod + 0xf ) / 0x10 + 1 ) : 1 ;
2015-01-11 04:48:04 -07:00
const bool from_tmem = tex . texImage1 [ id ] . image_type ! = 0 ;
2010-10-19 16:24:27 -06:00
if ( 0 = = address )
2014-03-09 14:14:26 -06:00
return nullptr ;
2010-10-19 16:24:27 -06:00
2013-02-15 19:46:03 -07:00
// TexelSizeInNibbles(format) * width * height / 16;
2015-09-06 04:07:32 -06:00
const unsigned int bsw = TexDecoder_GetBlockWidthInTexels ( texformat ) ;
const unsigned int bsh = TexDecoder_GetBlockHeightInTexels ( texformat ) ;
2010-10-19 16:24:27 -06:00
2015-09-06 04:07:32 -06:00
unsigned int expandedWidth = ROUND_UP ( width , bsw ) ;
unsigned int expandedHeight = ROUND_UP ( height , bsh ) ;
2010-10-19 18:12:41 -06:00
const unsigned int nativeW = width ;
const unsigned int nativeH = height ;
2010-10-19 16:24:27 -06:00
2013-02-15 19:46:03 -07:00
// Hash assigned to texcache entry (also used to generate filenames used for texture dumping and custom texture lookup)
2015-09-10 14:28:59 -06:00
u64 base_hash = TEXHASH_INVALID ;
2015-02-22 10:42:19 -07:00
u64 full_hash = TEXHASH_INVALID ;
2011-12-26 13:37:18 -07:00
2010-10-19 16:24:27 -06:00
u32 full_format = texformat ;
2011-02-05 03:08:06 -07:00
const bool isPaletteTexture = ( texformat = = GX_TF_C4 | | texformat = = GX_TF_C8 | | texformat = = GX_TF_C14X2 ) ;
2015-01-26 16:33:23 -07:00
// Reject invalid tlut format.
if ( isPaletteTexture & & tlutfmt > GX_TL_RGB5A3 )
return nullptr ;
2011-02-05 03:08:06 -07:00
if ( isPaletteTexture )
2010-10-19 16:24:27 -06:00
full_format = texformat | ( tlutfmt < < 16 ) ;
2011-12-26 15:14:12 -07:00
const u32 texture_size = TexDecoder_GetTextureSizeInBytes ( expandedWidth , expandedHeight , texformat ) ;
2015-09-07 09:05:47 -06:00
u32 additional_mips_size = 0 ; // not including level 0, which is texture_size
// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain
// e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there
tex_levels = std : : min < u32 > ( IntLog2 ( std : : max ( width , height ) ) + 1 , tex_levels ) ;
for ( u32 level = 1 ; level ! = tex_levels ; + + level )
{
// We still need to calculate the original size of the mips
const u32 expanded_mip_width = ROUND_UP ( CalculateLevelSize ( width , level ) , bsw ) ;
const u32 expanded_mip_height = ROUND_UP ( CalculateLevelSize ( height , level ) , bsh ) ;
additional_mips_size + = TexDecoder_GetTextureSizeInBytes ( expanded_mip_width , expanded_mip_height , texformat ) ;
}
// If we are recording a FifoLog, keep track of what memory we read.
// FifiRecorder does it's own memory modification tracking independant of the texture hashing below.
if ( g_bRecordFifoData & & ! from_tmem )
FifoRecorder : : GetInstance ( ) . UseMemory ( address , texture_size + additional_mips_size , MemoryUpdate : : TEXTURE_MAP ) ;
2013-02-18 09:14:56 -07:00
2013-02-15 19:46:03 -07:00
const u8 * src_data ;
if ( from_tmem )
src_data = & texMem [ bpmem . tex [ stage / 4 ] . texImage1 [ stage % 4 ] . tmem_even * TMEM_LINE_SIZE ] ;
else
src_data = Memory : : GetPointer ( address ) ;
2011-12-26 15:14:12 -07:00
2013-01-10 07:12:21 -07:00
// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data from the low tmem bank than it should)
2015-09-10 14:28:59 -06:00
base_hash = GetHash64 ( src_data , texture_size , g_ActiveConfig . iSafeTextureCache_ColorSamples ) ;
2014-12-22 04:53:03 -07:00
u32 palette_size = 0 ;
2011-12-26 14:04:59 -07:00
if ( isPaletteTexture )
2010-10-19 16:24:27 -06:00
{
2014-12-22 04:53:03 -07:00
palette_size = TexDecoder_GetPaletteSize ( texformat ) ;
2015-09-10 14:28:59 -06:00
full_hash = base_hash ^ GetHash64 ( & texMem [ tlutaddr ] , palette_size , g_ActiveConfig . iSafeTextureCache_ColorSamples ) ;
2015-02-22 10:42:19 -07:00
}
else
{
2015-09-10 14:28:59 -06:00
full_hash = base_hash ;
2011-12-26 13:11:31 -07:00
}
2010-10-24 09:16:31 -06:00
2015-02-22 10:42:19 -07:00
// Search the texture cache for textures by address
//
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
// Find all texture cache entries for the current texture address, and decide whether to use one of
// them, or to create a new one
//
// In most cases, the fastest way is to use only one texture cache entry for the same address. Usually,
// when a texture changes, the old version of the texture is unlikely to be used again. If there were
// new cache entries created for normal texture updates, there would be a slowdown due to a huge amount
// of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is
// faster than creating a new one from scratch.
//
// Some games use the same address for different textures though. If the same cache entry was used in
// this case, it would be constantly overwritten, and effectively there wouldn't be any caching for
// those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has multiple
// sets of fonts on each other stored in a single texture and uses the palette to make different
// characters visible or invisible. In Castlevania 3 some textures are used for 2 different things or
// at least in 2 different ways(size 1024x1024 vs 1024x256).
//
// To determine whether to use multiple cache entries or a single entry, use the following heuristic:
// If the same texture address is used several times during the same frame, assume the address is used
// for different purposes and allow creating an additional cache entry. If there's at least one entry
// that hasn't been used for the same frame, then overwrite it, in order to keep the cache as small as
// possible. If the current texture is found in the cache, use that entry.
//
// For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else it was
// done in vain.
2015-02-22 10:42:19 -07:00
std : : pair < TexCache : : iterator , TexCache : : iterator > iter_range = textures_by_address . equal_range ( ( u64 ) address ) ;
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
TexCache : : iterator iter = iter_range . first ;
TexCache : : iterator oldest_entry = iter ;
int temp_frameCount = 0x7fffffff ;
2015-02-22 10:42:19 -07:00
TexCache : : iterator unconverted_copy = textures_by_address . end ( ) ;
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
while ( iter ! = iter_range . second )
2011-12-26 13:11:31 -07:00
{
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
TCacheEntryBase * entry = iter - > second ;
2015-10-11 12:11:12 -06:00
// Do not load strided EFB copies, they are not meant to be used directly
if ( entry - > IsEfbCopy ( ) & & entry - > native_width = = nativeW & & entry - > native_height = = nativeH & &
2015-10-19 10:00:29 -06:00
entry - > memory_stride = = entry - > BytesPerRow ( ) )
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
{
2015-09-05 08:03:49 -06:00
// EFB copies have slightly different rules as EFB copy formats have different
// meanings from texture formats.
2015-09-10 14:28:59 -06:00
if ( ( base_hash = = entry - > hash & & ( ! isPaletteTexture | | g_Config . backend_info . bSupportsPaletteConversion ) ) | |
2015-09-06 08:31:32 -06:00
IsPlayingBackFifologWithBrokenEFBCopies )
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
{
2015-01-26 16:33:23 -07:00
// TODO: We should check format/width/height/levels for EFB copies. Checking
// format is complicated because EFB copy formats don't exactly match
// texture formats. I'm not sure what effect checking width/height/levels
// would have.
2015-02-19 16:19:31 -07:00
if ( ! isPaletteTexture | | ! g_Config . backend_info . bSupportsPaletteConversion )
return ReturnEntry ( stage , entry ) ;
// Note that we found an unconverted EFB copy, then continue. We'll
// perform the conversion later. Currently, we only convert EFB copies to
// palette textures; we could do other conversions if it proved to be
// beneficial.
unconverted_copy = iter ;
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
}
else
{
2015-02-19 16:19:31 -07:00
// Aggressively prune EFB copies: if it isn't useful here, it will probably
// never be useful again. It's theoretically possible for a game to do
// something weird where the copy could become useful in the future, but in
// practice it doesn't happen.
2015-06-09 13:45:09 -06:00
iter = FreeTexture ( iter ) ;
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
continue ;
}
}
2015-01-26 16:33:23 -07:00
else
2010-10-19 16:24:27 -06:00
{
2015-01-26 16:33:23 -07:00
// For normal textures, all texture parameters need to match
2015-02-22 10:42:19 -07:00
if ( entry - > hash = = full_hash & & entry - > format = = full_format & & entry - > native_levels > = tex_levels & &
2015-01-26 16:33:23 -07:00
entry - > native_width = = nativeW & & entry - > native_height = = nativeH )
{
2015-06-29 19:19:19 -06:00
entry = DoPartialTextureUpdates ( iter ) ;
2015-06-13 07:51:58 -06:00
2015-01-26 16:33:23 -07:00
return ReturnEntry ( stage , entry ) ;
}
2010-10-19 16:24:27 -06:00
}
2011-12-26 11:45:13 -07:00
2015-09-12 00:59:14 -06:00
// Find the texture which hasn't been used for the longest time. Count paletted
// textures as the same texture here, when the texture itself is the same. This
// improves the performance a lot in some games that use paletted textures.
// Example: Sonic the Fighters (inside Sonic Gems Collection)
2015-10-11 12:11:12 -06:00
// Skip EFB copies here, so they can be used for partial texture updates
2015-09-12 00:59:14 -06:00
if ( entry - > frameCount ! = FRAMECOUNT_INVALID & & entry - > frameCount < temp_frameCount & &
2015-10-11 12:11:12 -06:00
! entry - > IsEfbCopy ( ) & & ! ( isPaletteTexture & & entry - > base_hash = = base_hash ) )
2011-12-26 11:45:13 -07:00
{
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
temp_frameCount = entry - > frameCount ;
oldest_entry = iter ;
2011-12-26 11:45:13 -07:00
}
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
+ + iter ;
}
2011-12-26 11:45:13 -07:00
2015-02-22 10:42:19 -07:00
if ( unconverted_copy ! = textures_by_address . end ( ) )
2015-02-19 16:19:31 -07:00
{
// Perform palette decoding.
TCacheEntryBase * entry = unconverted_copy - > second ;
2015-02-19 16:53:52 -07:00
TCacheEntryConfig config ;
config . rendertarget = true ;
config . width = entry - > config . width ;
config . height = entry - > config . height ;
config . layers = FramebufferManagerBase : : GetEFBLayers ( ) ;
TCacheEntryBase * decoded_entry = AllocateTexture ( config ) ;
2015-02-19 16:19:31 -07:00
decoded_entry - > SetGeneralParameters ( address , texture_size , full_format ) ;
decoded_entry - > SetDimensions ( entry - > native_width , entry - > native_height , 1 ) ;
2015-09-10 14:28:59 -06:00
decoded_entry - > SetHashes ( base_hash , full_hash ) ;
2015-02-19 16:19:31 -07:00
decoded_entry - > frameCount = FRAMECOUNT_INVALID ;
decoded_entry - > is_efb_copy = false ;
g_texture_cache - > ConvertTexture ( decoded_entry , entry , & texMem [ tlutaddr ] , ( TlutFormat ) tlutfmt ) ;
2015-06-28 17:08:28 -06:00
textures_by_address . emplace ( ( u64 ) address , decoded_entry ) ;
2015-02-19 16:19:31 -07:00
return ReturnEntry ( stage , decoded_entry ) ;
}
2015-02-22 10:42:19 -07:00
// Search the texture cache for normal textures by hash
//
// If the texture was fully hashed, the address does not need to match. Identical duplicate textures cause unnecessary slowdowns
// Example: Tales of Symphonia (GC) uses over 500 small textures in menus, but only around 70 different ones
if ( g_ActiveConfig . iSafeTextureCache_ColorSamples = = 0 | |
std : : max ( texture_size , palette_size ) < = ( u32 ) g_ActiveConfig . iSafeTextureCache_ColorSamples * 8 )
{
iter_range = textures_by_hash . equal_range ( full_hash ) ;
iter = iter_range . first ;
while ( iter ! = iter_range . second )
{
TCacheEntryBase * entry = iter - > second ;
// All parameters, except the address, need to match here
if ( entry - > format = = full_format & & entry - > native_levels > = tex_levels & &
entry - > native_width = = nativeW & & entry - > native_height = = nativeH )
{
2015-06-29 19:19:19 -06:00
entry = DoPartialTextureUpdates ( iter ) ;
2015-06-13 07:51:58 -06:00
2015-02-22 10:42:19 -07:00
return ReturnEntry ( stage , entry ) ;
}
+ + iter ;
}
}
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
// If at least one entry was not used for the same frame, overwrite the oldest one
if ( temp_frameCount ! = 0x7fffffff )
{
2015-01-17 03:18:57 -07:00
// pool this texture and make a new one later
2015-06-09 13:45:09 -06:00
FreeTexture ( oldest_entry ) ;
2010-10-19 16:24:27 -06:00
}
2011-12-26 11:17:04 -07:00
2015-03-01 15:53:15 -07:00
std : : shared_ptr < HiresTexture > hires_tex ;
2010-10-19 16:24:27 -06:00
if ( g_ActiveConfig . bHiresTextures )
{
2015-03-01 15:53:15 -07:00
hires_tex = HiresTexture : : Search (
2014-12-22 04:53:03 -07:00
src_data , texture_size ,
& texMem [ tlutaddr ] , palette_size ,
width , height ,
2015-01-15 13:33:22 -07:00
texformat , use_mipmaps
2015-03-01 15:53:15 -07:00
) ;
2014-12-22 04:53:03 -07:00
if ( hires_tex )
2010-10-19 16:24:27 -06:00
{
2014-12-22 04:53:03 -07:00
auto & l = hires_tex - > m_levels [ 0 ] ;
if ( l . width ! = width | | l . height ! = height )
2012-12-19 08:55:44 -07:00
{
2014-12-22 04:53:03 -07:00
width = l . width ;
height = l . height ;
2012-12-19 08:55:44 -07:00
}
2014-12-22 04:53:03 -07:00
expandedWidth = l . width ;
expandedHeight = l . height ;
CheckTempSize ( l . data_size ) ;
memcpy ( temp , l . data , l . data_size ) ;
2010-10-19 16:24:27 -06:00
}
}
2014-12-22 04:53:03 -07:00
if ( ! hires_tex )
2013-01-10 07:12:21 -07:00
{
if ( ! ( texformat = = GX_TF_RGBA8 & & from_tmem ) )
{
2014-08-10 13:28:42 -06:00
const u8 * tlut = & texMem [ tlutaddr ] ;
2015-06-29 19:19:19 -06:00
TexDecoder_Decode ( temp , src_data , expandedWidth , expandedHeight , texformat , tlut , ( TlutFormat ) tlutfmt ) ;
2013-01-10 07:12:21 -07:00
}
else
{
2015-02-15 12:43:31 -07:00
u8 * src_data_gb = & texMem [ bpmem . tex [ stage / 4 ] . texImage2 [ stage % 4 ] . tmem_odd * TMEM_LINE_SIZE ] ;
2015-01-17 01:46:31 -07:00
TexDecoder_DecodeRGBA8FromTmem ( temp , src_data , src_data_gb , expandedWidth , expandedHeight ) ;
2013-01-10 07:12:21 -07:00
}
}
2010-10-19 16:24:27 -06:00
2015-03-01 05:04:48 -07:00
// how many levels the allocated texture shall have
const u32 texLevels = hires_tex ? ( u32 ) hires_tex - > m_levels . size ( ) : tex_levels ;
2010-10-19 16:24:27 -06:00
// create the entry/texture
2015-01-17 03:18:57 -07:00
TCacheEntryConfig config ;
config . width = width ;
config . height = height ;
config . levels = texLevels ;
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
TCacheEntryBase * entry = AllocateTexture ( config ) ;
2015-01-17 03:18:57 -07:00
GFX_DEBUGGER_PAUSE_AT ( NEXT_NEW_TEXTURE , true ) ;
2010-10-19 16:24:27 -06:00
2015-06-29 19:19:19 -06:00
iter = textures_by_address . emplace ( ( u64 ) address , entry ) ;
2015-02-22 10:42:19 -07:00
if ( g_ActiveConfig . iSafeTextureCache_ColorSamples = = 0 | |
std : : max ( texture_size , palette_size ) < = ( u32 ) g_ActiveConfig . iSafeTextureCache_ColorSamples * 8 )
{
2015-06-28 17:08:28 -06:00
entry - > textures_by_hash_iter = textures_by_hash . emplace ( full_hash , entry ) ;
2015-02-22 10:42:19 -07:00
}
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
2015-02-19 16:19:31 -07:00
entry - > SetGeneralParameters ( address , texture_size , full_format ) ;
2015-01-11 14:41:04 -07:00
entry - > SetDimensions ( nativeW , nativeH , tex_levels ) ;
2015-09-10 14:28:59 -06:00
entry - > SetHashes ( base_hash , full_hash ) ;
2015-02-19 16:19:31 -07:00
entry - > is_efb_copy = false ;
2015-03-01 05:04:48 -07:00
entry - > is_custom_tex = hires_tex ! = nullptr ;
2013-10-28 23:23:17 -06:00
2014-05-19 14:35:53 -06:00
// load texture
entry - > Load ( width , height , expandedWidth , 0 ) ;
2014-12-22 14:33:38 -07:00
std : : string basename = " " ;
2014-12-22 04:53:03 -07:00
if ( g_ActiveConfig . bDumpTextures & & ! hires_tex )
2014-12-22 14:33:38 -07:00
{
basename = HiresTexture : : GenBaseName (
src_data , texture_size ,
& texMem [ tlutaddr ] , palette_size ,
width , height ,
2015-01-15 13:33:22 -07:00
texformat , use_mipmaps ,
true
2014-12-22 14:33:38 -07:00
) ;
DumpTexture ( entry , basename , 0 ) ;
}
2012-05-12 05:50:03 -06:00
2015-03-01 05:04:48 -07:00
if ( hires_tex )
2010-10-19 16:24:27 -06:00
{
2015-03-01 05:04:48 -07:00
for ( u32 level = 1 ; level ! = texLevels ; + + level )
{
auto & l = hires_tex - > m_levels [ level ] ;
CheckTempSize ( l . data_size ) ;
memcpy ( temp , l . data , l . data_size ) ;
entry - > Load ( l . width , l . height , l . width , level ) ;
}
}
else
{
// load mips - TODO: Loading mipmaps from tmem is untested!
2015-01-17 01:46:31 -07:00
src_data + = texture_size ;
const u8 * ptr_even = nullptr ;
const u8 * ptr_odd = nullptr ;
if ( from_tmem )
2012-01-29 13:49:50 -07:00
{
2015-02-15 12:43:31 -07:00
ptr_even = & texMem [ bpmem . tex [ stage / 4 ] . texImage1 [ stage % 4 ] . tmem_even * TMEM_LINE_SIZE + texture_size ] ;
ptr_odd = & texMem [ bpmem . tex [ stage / 4 ] . texImage2 [ stage % 4 ] . tmem_odd * TMEM_LINE_SIZE ] ;
2015-01-17 01:46:31 -07:00
}
2013-10-28 23:23:17 -06:00
2015-03-01 05:04:48 -07:00
for ( u32 level = 1 ; level ! = texLevels ; + + level )
2015-01-17 01:46:31 -07:00
{
const u32 mip_width = CalculateLevelSize ( width , level ) ;
const u32 mip_height = CalculateLevelSize ( height , level ) ;
2015-09-06 04:07:32 -06:00
const u32 expanded_mip_width = ROUND_UP ( mip_width , bsw ) ;
const u32 expanded_mip_height = ROUND_UP ( mip_height , bsh ) ;
2015-01-17 01:46:31 -07:00
const u8 * & mip_src_data = from_tmem
? ( ( level % 2 ) ? ptr_odd : ptr_even )
: src_data ;
const u8 * tlut = & texMem [ tlutaddr ] ;
2015-02-15 12:43:31 -07:00
TexDecoder_Decode ( temp , mip_src_data , expanded_mip_width , expanded_mip_height , texformat , tlut , ( TlutFormat ) tlutfmt ) ;
2015-01-17 01:46:31 -07:00
mip_src_data + = TexDecoder_GetTextureSizeInBytes ( expanded_mip_width , expanded_mip_height , texformat ) ;
2012-05-12 05:50:03 -06:00
2015-01-17 01:46:31 -07:00
entry - > Load ( mip_width , mip_height , expanded_mip_width , level ) ;
if ( g_ActiveConfig . bDumpTextures )
DumpTexture ( entry , basename , level ) ;
2010-10-19 16:24:27 -06:00
}
2015-01-17 01:46:31 -07:00
}
2010-10-19 16:24:27 -06:00
2015-01-17 02:29:10 -07:00
INCSTAT ( stats . numTexturesUploaded ) ;
2015-02-22 10:42:19 -07:00
SETSTAT ( stats . numTexturesAlive , textures_by_address . size ( ) ) ;
2010-10-19 16:24:27 -06:00
2015-06-29 19:19:19 -06:00
entry = DoPartialTextureUpdates ( iter ) ;
2015-06-13 07:51:58 -06:00
2013-02-15 19:46:03 -07:00
return ReturnEntry ( stage , entry ) ;
2010-10-19 16:24:27 -06:00
}
2015-08-31 08:41:16 -06:00
void TextureCache : : CopyRenderTargetToTexture ( u32 dstAddr , unsigned int dstFormat , u32 dstStride , PEControl : : PixelFormat srcFormat ,
2011-02-26 16:41:02 -07:00
const EFBRectangle & srcRect , bool isIntensity , bool scaleByHalf )
2010-10-19 16:24:27 -06:00
{
2011-12-26 15:14:12 -07:00
// Emulation methods:
2013-10-28 23:23:17 -06:00
//
2011-12-26 15:14:12 -07:00
// - EFB to RAM:
2014-02-16 21:51:41 -07:00
// Encodes the requested EFB data at its native resolution to the emulated RAM using shaders.
// Load() decodes the data from there again (using TextureDecoder) if the EFB copy is being used as a texture again.
// Advantage: CPU can read data from the EFB copy and we don't lose any important updates to the texture
// Disadvantage: Encoding+decoding steps often are redundant because only some games read or modify EFB copies before using them as textures.
2013-10-28 23:23:17 -06:00
//
2011-12-26 15:14:12 -07:00
// - EFB to texture:
2014-02-16 21:51:41 -07:00
// Copies the requested EFB data to a texture object in VRAM, performing any color conversion using shaders.
// Advantage: Works for many games, since in most cases EFB copies aren't read or modified at all before being used as a texture again.
// Since we don't do any further encoding or decoding here, this method is much faster.
// It also allows enhancing the visual quality by doing scaled EFB copies.
2013-10-28 23:23:17 -06:00
//
2013-04-24 07:21:54 -06:00
// - Hybrid EFB copies:
2014-02-16 21:51:41 -07:00
// 1a) Whenever this function gets called, encode the requested EFB data to RAM (like EFB to RAM)
// 1b) Set type to TCET_EC_DYNAMIC for all texture cache entries in the destination address range.
// If EFB copy caching is enabled, further checks will (try to) prevent redundant EFB copies.
// 2) Check if a texture cache entry for the specified dstAddr already exists (i.e. if an EFB copy was triggered to that address before):
// 2a) Entry doesn't exist:
// - Also copy the requested EFB data to a texture object in VRAM (like EFB to texture)
// - Create a texture cache entry for the target (type = TCET_EC_VRAM)
// - Store a hash of the encoded RAM data in the texcache entry.
// 2b) Entry exists AND type is TCET_EC_VRAM:
// - Like case 2a, but reuse the old texcache entry instead of creating a new one.
// 2c) Entry exists AND type is TCET_EC_DYNAMIC:
// - Only encode the texture to RAM (like EFB to RAM) and store a hash of the encoded data in the existing texcache entry.
// - Do NOT copy the requested EFB data to a VRAM object. Reason: the texture is dynamic, i.e. the CPU is modifying it. Storing a VRAM copy is useless, because we'd always end up deleting it and reloading the data from RAM anyway.
// 3) If the EFB copy gets used as a texture, compare the source RAM hash with the hash you stored when encoding the EFB data to RAM.
// 3a) If the two hashes match AND type is TCET_EC_VRAM, reuse the VRAM copy you created
// 3b) If the two hashes differ AND type is TCET_EC_VRAM, screw your existing VRAM copy. Set type to TCET_EC_DYNAMIC.
// Redecode the source RAM data to a VRAM object. The entry basically behaves like a normal texture now.
// 3c) If type is TCET_EC_DYNAMIC, treat the EFB copy like a normal texture.
// Advantage: Non-dynamic EFB copies can be visually enhanced like with EFB to texture.
// Compatibility is as good as EFB to RAM.
// Disadvantage: Slower than EFB to texture and often even slower than EFB to RAM.
// EFB copy cache depends on accurate texture hashing being enabled. However, with accurate hashing you end up being as slow as without a copy cache anyway.
2011-12-26 15:14:12 -07:00
//
// Disadvantage of all methods: Calling this function requires the GPU to perform a pipeline flush which stalls any further CPU processing.
2012-01-24 08:38:27 -07:00
//
// For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies.
2011-12-26 15:14:12 -07:00
2015-09-07 09:05:47 -06:00
float colmat [ 28 ] = { 0 } ;
2010-10-19 16:24:27 -06:00
float * const fConstAdd = colmat + 16 ;
2011-01-07 12:23:57 -07:00
float * const ColorMask = colmat + 20 ;
2011-12-10 08:08:26 -07:00
ColorMask [ 0 ] = ColorMask [ 1 ] = ColorMask [ 2 ] = ColorMask [ 3 ] = 255.0f ;
2011-01-07 12:23:57 -07:00
ColorMask [ 4 ] = ColorMask [ 5 ] = ColorMask [ 6 ] = ColorMask [ 7 ] = 1.0f / 255.0f ;
2010-10-19 16:24:27 -06:00
unsigned int cbufid = - 1 ;
2014-03-23 14:44:23 -06:00
bool efbHasAlpha = bpmem . zcontrol . pixel_format = = PEControl : : RGBA6_Z24 ;
2010-10-19 16:24:27 -06:00
2014-03-23 14:44:23 -06:00
if ( srcFormat = = PEControl : : Z24 )
2010-10-19 16:24:27 -06:00
{
2011-02-26 16:41:02 -07:00
switch ( dstFormat )
2010-10-19 16:24:27 -06:00
{
2011-12-10 08:08:26 -07:00
case 0 : // Z4
2011-01-07 12:23:57 -07:00
colmat [ 3 ] = colmat [ 7 ] = colmat [ 11 ] = colmat [ 15 ] = 1.0f ;
cbufid = 0 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
2015-09-04 08:45:29 -06:00
case 8 : // Z8H
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
case 1 : // Z8
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 12 ] = 1.0f ;
cbufid = 1 ;
break ;
2011-12-10 08:08:26 -07:00
case 3 : // Z16
2011-01-07 12:23:57 -07:00
colmat [ 1 ] = colmat [ 5 ] = colmat [ 9 ] = colmat [ 12 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 2 ;
2011-01-07 12:23:57 -07:00
break ;
case 11 : // Z16 (reverse order)
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 13 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 3 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
case 6 : // Z24X8
colmat [ 0 ] = colmat [ 5 ] = colmat [ 10 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 4 ;
2011-01-07 12:23:57 -07:00
break ;
case 9 : // Z8M
colmat [ 1 ] = colmat [ 5 ] = colmat [ 9 ] = colmat [ 13 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 5 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
case 10 : // Z8L
colmat [ 2 ] = colmat [ 6 ] = colmat [ 10 ] = colmat [ 14 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 6 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
2011-12-26 10:05:01 -07:00
case 12 : // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits stored as alpha)
// Used e.g. in Zelda: Skyward Sword
2011-12-10 08:08:26 -07:00
colmat [ 1 ] = colmat [ 5 ] = colmat [ 9 ] = colmat [ 14 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 7 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
default :
2011-02-26 16:41:02 -07:00
ERROR_LOG ( VIDEO , " Unknown copy zbuf format: 0x%x " , dstFormat ) ;
2011-01-07 12:23:57 -07:00
colmat [ 2 ] = colmat [ 5 ] = colmat [ 8 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 8 ;
2011-01-07 12:23:57 -07:00
break ;
2010-10-19 16:24:27 -06:00
}
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_ZTF ;
2010-10-19 16:24:27 -06:00
}
2013-10-28 23:23:17 -06:00
else if ( isIntensity )
2010-10-19 16:24:27 -06:00
{
2015-02-15 12:43:31 -07:00
fConstAdd [ 0 ] = fConstAdd [ 1 ] = fConstAdd [ 2 ] = 16.0f / 255.0f ;
2013-10-28 23:23:17 -06:00
switch ( dstFormat )
2010-10-19 16:24:27 -06:00
{
case 0 : // I4
case 1 : // I8
case 2 : // IA4
case 3 : // IA8
2010-12-18 11:23:22 -07:00
case 8 : // I8
2010-10-19 16:24:27 -06:00
// TODO - verify these coefficients
colmat [ 0 ] = 0.257f ; colmat [ 1 ] = 0.504f ; colmat [ 2 ] = 0.098f ;
colmat [ 4 ] = 0.257f ; colmat [ 5 ] = 0.504f ; colmat [ 6 ] = 0.098f ;
colmat [ 8 ] = 0.257f ; colmat [ 9 ] = 0.504f ; colmat [ 10 ] = 0.098f ;
2013-10-28 23:23:17 -06:00
if ( dstFormat < 2 | | dstFormat = = 8 )
2010-10-19 16:24:27 -06:00
{
colmat [ 12 ] = 0.257f ; colmat [ 13 ] = 0.504f ; colmat [ 14 ] = 0.098f ;
2015-02-15 12:43:31 -07:00
fConstAdd [ 3 ] = 16.0f / 255.0f ;
2011-02-26 16:41:02 -07:00
if ( dstFormat = = 0 )
2011-01-07 12:23:57 -07:00
{
ColorMask [ 0 ] = ColorMask [ 1 ] = ColorMask [ 2 ] = 15.0f ;
ColorMask [ 4 ] = ColorMask [ 5 ] = ColorMask [ 6 ] = 1.0f / 15.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 9 ;
2011-01-07 12:23:57 -07:00
}
else
{
2013-11-23 20:43:54 -07:00
cbufid = 10 ;
2013-04-24 07:21:54 -06:00
}
2010-10-19 16:24:27 -06:00
}
else // alpha
{
colmat [ 15 ] = 1 ;
2011-02-26 16:41:02 -07:00
if ( dstFormat = = 2 )
2011-01-07 12:23:57 -07:00
{
ColorMask [ 0 ] = ColorMask [ 1 ] = ColorMask [ 2 ] = ColorMask [ 3 ] = 15.0f ;
ColorMask [ 4 ] = ColorMask [ 5 ] = ColorMask [ 6 ] = ColorMask [ 7 ] = 1.0f / 15.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 11 ;
2011-01-07 12:23:57 -07:00
}
else
{
2013-11-23 20:43:54 -07:00
cbufid = 12 ;
2011-01-07 12:23:57 -07:00
}
2013-10-28 23:23:17 -06:00
2010-10-19 16:24:27 -06:00
}
break ;
default :
2011-02-26 16:41:02 -07:00
ERROR_LOG ( VIDEO , " Unknown copy intensity format: 0x%x " , dstFormat ) ;
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 5 ] = colmat [ 10 ] = colmat [ 15 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 13 ;
2010-10-19 16:24:27 -06:00
break ;
}
}
else
{
2013-10-28 23:23:17 -06:00
switch ( dstFormat )
2010-10-19 16:24:27 -06:00
{
case 0 : // R4
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 12 ] = 1 ;
ColorMask [ 0 ] = 15.0f ;
ColorMask [ 4 ] = 1.0f / 15.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 14 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
2010-12-18 11:23:22 -07:00
case 1 : // R8
2010-10-19 16:24:27 -06:00
case 8 : // R8
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 12 ] = 1 ;
2013-11-23 20:43:54 -07:00
cbufid = 15 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 2 : // RA4
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 15 ] = 1.0f ;
ColorMask [ 0 ] = ColorMask [ 3 ] = 15.0f ;
ColorMask [ 4 ] = ColorMask [ 7 ] = 1.0f / 15.0f ;
2014-03-29 04:05:44 -06:00
2013-11-23 20:43:54 -07:00
cbufid = 16 ;
2014-08-30 14:51:27 -06:00
if ( ! efbHasAlpha )
{
2013-11-06 14:19:37 -07:00
ColorMask [ 3 ] = 0.0f ;
fConstAdd [ 3 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 17 ;
2013-11-06 14:19:37 -07:00
}
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2011-01-07 12:23:57 -07:00
break ;
2010-10-19 16:24:27 -06:00
case 3 : // RA8
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 15 ] = 1.0f ;
2014-03-29 04:05:44 -06:00
2013-11-23 20:43:54 -07:00
cbufid = 18 ;
2014-08-30 14:51:27 -06:00
if ( ! efbHasAlpha )
{
2013-11-06 14:19:37 -07:00
ColorMask [ 3 ] = 0.0f ;
fConstAdd [ 3 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 19 ;
2013-11-06 14:19:37 -07:00
}
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 7 : // A8
2011-01-07 12:23:57 -07:00
colmat [ 3 ] = colmat [ 7 ] = colmat [ 11 ] = colmat [ 15 ] = 1.0f ;
2014-03-29 04:05:44 -06:00
2013-11-23 20:43:54 -07:00
cbufid = 20 ;
2014-08-30 14:51:27 -06:00
if ( ! efbHasAlpha )
{
2013-11-06 14:19:37 -07:00
ColorMask [ 3 ] = 0.0f ;
fConstAdd [ 0 ] = 1.0f ;
fConstAdd [ 1 ] = 1.0f ;
fConstAdd [ 2 ] = 1.0f ;
fConstAdd [ 3 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 21 ;
2013-11-06 14:19:37 -07:00
}
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 9 : // G8
2011-01-07 12:23:57 -07:00
colmat [ 1 ] = colmat [ 5 ] = colmat [ 9 ] = colmat [ 13 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 22 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 10 : // B8
2011-01-07 12:23:57 -07:00
colmat [ 2 ] = colmat [ 6 ] = colmat [ 10 ] = colmat [ 14 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 23 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 11 : // RG8
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 4 ] = colmat [ 8 ] = colmat [ 13 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 24 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 12 : // GB8
2013-03-19 19:51:12 -06:00
colmat [ 1 ] = colmat [ 5 ] = colmat [ 9 ] = colmat [ 14 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 25 ;
2015-09-04 08:45:29 -06:00
dstFormat | = _GX_TF_CTF ;
2010-10-19 16:24:27 -06:00
break ;
case 4 : // RGB565
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 5 ] = colmat [ 10 ] = 1.0f ;
ColorMask [ 0 ] = ColorMask [ 2 ] = 31.0f ;
ColorMask [ 4 ] = ColorMask [ 6 ] = 1.0f / 31.0f ;
ColorMask [ 1 ] = 63.0f ;
ColorMask [ 5 ] = 1.0f / 63.0f ;
fConstAdd [ 3 ] = 1.0f ; // set alpha to 1
2013-11-23 20:43:54 -07:00
cbufid = 26 ;
2010-10-19 16:24:27 -06:00
break ;
case 5 : // RGB5A3
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 5 ] = colmat [ 10 ] = colmat [ 15 ] = 1.0f ;
ColorMask [ 0 ] = ColorMask [ 1 ] = ColorMask [ 2 ] = 31.0f ;
ColorMask [ 4 ] = ColorMask [ 5 ] = ColorMask [ 6 ] = 1.0f / 31.0f ;
ColorMask [ 3 ] = 7.0f ;
ColorMask [ 7 ] = 1.0f / 7.0f ;
2014-03-29 04:05:44 -06:00
2013-11-23 20:43:54 -07:00
cbufid = 27 ;
2014-08-30 14:51:27 -06:00
if ( ! efbHasAlpha )
{
2013-11-06 14:19:37 -07:00
ColorMask [ 3 ] = 0.0f ;
fConstAdd [ 3 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 28 ;
2013-11-06 14:19:37 -07:00
}
2011-01-07 12:23:57 -07:00
break ;
2010-10-19 16:24:27 -06:00
case 6 : // RGBA8
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 5 ] = colmat [ 10 ] = colmat [ 15 ] = 1.0f ;
2014-03-29 04:05:44 -06:00
2013-11-23 20:43:54 -07:00
cbufid = 29 ;
2014-08-30 14:51:27 -06:00
if ( ! efbHasAlpha )
{
2013-11-06 14:19:37 -07:00
ColorMask [ 3 ] = 0.0f ;
fConstAdd [ 3 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 30 ;
2013-11-06 14:19:37 -07:00
}
2010-10-19 16:24:27 -06:00
break ;
default :
2011-02-26 16:41:02 -07:00
ERROR_LOG ( VIDEO , " Unknown copy color format: 0x%x " , dstFormat ) ;
2011-01-07 12:23:57 -07:00
colmat [ 0 ] = colmat [ 5 ] = colmat [ 10 ] = colmat [ 15 ] = 1.0f ;
2013-11-23 20:43:54 -07:00
cbufid = 31 ;
2010-10-19 16:24:27 -06:00
break ;
}
}
2015-09-04 08:45:29 -06:00
u8 * dst = Memory : : GetPointer ( dstAddr ) ;
if ( dst = = nullptr )
{
ERROR_LOG ( VIDEO , " Trying to copy from EFB to invalid address 0x%8x " , dstAddr ) ;
return ;
}
2015-02-15 12:43:31 -07:00
const unsigned int tex_w = scaleByHalf ? srcRect . GetWidth ( ) / 2 : srcRect . GetWidth ( ) ;
const unsigned int tex_h = scaleByHalf ? srcRect . GetHeight ( ) / 2 : srcRect . GetHeight ( ) ;
2010-10-19 16:24:27 -06:00
2010-12-10 08:54:14 -07:00
unsigned int scaled_tex_w = g_ActiveConfig . bCopyEFBScaled ? Renderer : : EFBToScaledX ( tex_w ) : tex_w ;
unsigned int scaled_tex_h = g_ActiveConfig . bCopyEFBScaled ? Renderer : : EFBToScaledY ( tex_h ) : tex_h ;
2010-10-19 16:24:27 -06:00
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
// remove all texture cache entries at dstAddr
{
2015-09-05 08:03:49 -06:00
std : : pair < TexCache : : iterator , TexCache : : iterator > iter_range = textures_by_address . equal_range ( ( u64 ) dstAddr ) ;
TexCache : : iterator iter = iter_range . first ;
while ( iter ! = iter_range . second )
{
iter = FreeTexture ( iter ) ;
}
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
}
2010-10-19 16:24:27 -06:00
2015-01-17 03:18:57 -07:00
// create the texture
TCacheEntryConfig config ;
config . rendertarget = true ;
config . width = scaled_tex_w ;
config . height = scaled_tex_h ;
config . layers = FramebufferManagerBase : : GetEFBLayers ( ) ;
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
TCacheEntryBase * entry = AllocateTexture ( config ) ;
2015-01-17 03:18:57 -07:00
2015-02-19 16:19:31 -07:00
entry - > SetGeneralParameters ( dstAddr , 0 , dstFormat ) ;
2015-01-17 03:18:57 -07:00
entry - > SetDimensions ( tex_w , tex_h , 1 ) ;
2010-10-19 16:24:27 -06:00
2014-05-19 10:31:38 -06:00
entry - > frameCount = FRAMECOUNT_INVALID ;
2015-09-04 08:45:29 -06:00
entry - > SetEfbCopy ( dstStride ) ;
2015-03-01 05:04:48 -07:00
entry - > is_custom_tex = false ;
2010-10-19 16:24:27 -06:00
2015-09-04 08:45:29 -06:00
entry - > FromRenderTarget ( dst , dstFormat , dstStride , srcFormat , srcRect , isIntensity , scaleByHalf , cbufid , colmat ) ;
2015-09-13 05:30:56 -06:00
u64 hash = entry - > CalculateHash ( ) ;
2015-09-10 14:28:59 -06:00
entry - > SetHashes ( hash , hash ) ;
2015-09-04 08:45:29 -06:00
2015-09-05 09:13:44 -06:00
// Invalidate all textures that overlap the range of our efb copy.
// Unless our efb copy has a weird stride, then we want avoid invalidating textures which
// we might be able to do a partial texture update on.
2015-10-19 10:00:29 -06:00
if ( entry - > memory_stride = = entry - > BytesPerRow ( ) )
2015-09-05 08:03:49 -06:00
{
TexCache : : iterator iter = textures_by_address . begin ( ) ;
2015-09-04 08:45:29 -06:00
while ( iter ! = textures_by_address . end ( ) )
{
if ( iter - > second - > OverlapsMemoryRange ( dstAddr , entry - > size_in_bytes ) )
iter = FreeTexture ( iter ) ;
else
+ + iter ;
}
}
Allow multiple texture cache entries for textures at the same address
This is the same trick which is used for Metroid's fonts/texts, but for all textures. If 2 different textures at the same address are loaded during the same frame, create a 2nd entry instead of overwriting the existing one. If the entry was overwritten in this case, there wouldn't be any caching, which results in a big performance drop.
The restriction to textures, which are loaded during the same frame, prevents creating lots of textures when textures are used in the regular way. This restriction is new. Overwriting textures, instead of creating new ones is faster, if the old ones are unlikely to be used again.
Since this would break efb copies, don't do it for efb copies.
Castlevania 3 goes from 80 fps to 115 fps for me.
There might be games that need a higher texture cache accuracy with this, but those games should also see a performance boost from this PR.
Some games, which use paletted textures, which are not efb copies, might be faster now. And also not require a higher texture cache accuracy anymore. (similar sitation as PR https://github.com/dolphin-emu/dolphin/pull/1916)
2015-02-01 07:36:19 -07:00
2015-05-02 04:10:40 -06:00
if ( g_ActiveConfig . bDumpEFBTarget )
{
static int count = 0 ;
entry - > Save ( StringFromFormat ( " %sefb_frame_%i.png " , File : : GetUserPath ( D_DUMPTEXTURES_IDX ) . c_str ( ) ,
count + + ) , 0 ) ;
}
2015-09-07 09:05:47 -06:00
if ( g_bRecordFifoData )
{
// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
u32 address = dstAddr ;
for ( u32 i = 0 ; i < entry - > NumBlocksY ( ) ; i + + )
{
2015-10-19 10:00:29 -06:00
FifoRecorder : : GetInstance ( ) . UseMemory ( address , entry - > BytesPerRow ( ) , MemoryUpdate : : TEXTURE_MAP , true ) ;
2015-09-07 09:05:47 -06:00
address + = entry - > memory_stride ;
}
}
2015-06-28 17:08:28 -06:00
textures_by_address . emplace ( ( u64 ) dstAddr , entry ) ;
2010-12-27 15:17:19 -07:00
}
2014-06-11 17:04:42 -06:00
2015-01-17 02:29:10 -07:00
TextureCache : : TCacheEntryBase * TextureCache : : AllocateTexture ( const TCacheEntryConfig & config )
2014-06-11 17:04:42 -06:00
{
2015-01-17 02:57:19 -07:00
TexPool : : iterator iter = texture_pool . find ( config ) ;
2015-02-22 10:42:19 -07:00
TextureCache : : TCacheEntryBase * entry ;
2015-01-17 02:57:19 -07:00
if ( iter ! = texture_pool . end ( ) )
2014-06-11 17:04:42 -06:00
{
2015-02-22 10:42:19 -07:00
entry = iter - > second ;
2015-01-17 02:57:19 -07:00
texture_pool . erase ( iter ) ;
2015-02-22 10:42:19 -07:00
}
else
{
entry = g_texture_cache - > CreateTexture ( config ) ;
INCSTAT ( stats . numTexturesCreated ) ;
}
2015-06-24 11:12:46 -06:00
entry - > textures_by_hash_iter = textures_by_hash . end ( ) ;
2015-02-22 10:42:19 -07:00
return entry ;
}
2015-06-09 13:45:09 -06:00
TextureCache : : TexCache : : iterator TextureCache : : FreeTexture ( TexCache : : iterator iter )
2015-02-22 10:42:19 -07:00
{
2015-06-09 13:45:09 -06:00
TCacheEntryBase * entry = iter - > second ;
2015-06-24 11:34:16 -06:00
if ( entry - > textures_by_hash_iter ! = textures_by_hash . end ( ) )
2015-02-22 10:42:19 -07:00
{
2015-06-09 13:45:09 -06:00
textures_by_hash . erase ( entry - > textures_by_hash_iter ) ;
2015-06-24 11:34:16 -06:00
entry - > textures_by_hash_iter = textures_by_hash . end ( ) ;
2014-06-11 17:04:42 -06:00
}
2015-01-17 02:57:19 -07:00
entry - > frameCount = FRAMECOUNT_INVALID ;
2015-06-28 17:08:28 -06:00
texture_pool . emplace ( entry - > config , entry ) ;
2015-06-09 13:45:09 -06:00
return textures_by_address . erase ( iter ) ;
2014-06-11 17:04:42 -06:00
}
2015-09-04 08:45:29 -06:00
2015-10-19 10:00:29 -06:00
u32 TextureCache : : TCacheEntryBase : : BytesPerRow ( ) const
2015-09-04 08:45:29 -06:00
{
2015-10-19 10:00:29 -06:00
const u32 blockW = TexDecoder_GetBlockWidthInTexels ( format ) ;
2015-09-04 08:45:29 -06:00
// Round up source height to multiple of block size
2015-10-19 10:00:29 -06:00
const u32 actualWidth = ROUND_UP ( native_width , blockW ) ;
2015-09-04 08:45:29 -06:00
2015-10-19 10:00:29 -06:00
const u32 numBlocksX = actualWidth / blockW ;
2015-09-04 08:45:29 -06:00
// RGBA takes two cache lines per block; all others take one
2015-10-19 10:00:29 -06:00
const u32 bytes_per_block = format = = GX_TF_RGBA8 ? 64 : 32 ;
return numBlocksX * bytes_per_block ;
2015-09-04 08:45:29 -06:00
}
u32 TextureCache : : TCacheEntryBase : : NumBlocksY ( ) const
{
2015-09-06 04:07:32 -06:00
u32 blockH = TexDecoder_GetBlockHeightInTexels ( format ) ;
2015-09-04 08:45:29 -06:00
// Round up source height to multiple of block size
2015-09-06 04:07:32 -06:00
u32 actualHeight = ROUND_UP ( native_height , blockH ) ;
2015-09-04 08:45:29 -06:00
2015-09-06 04:07:32 -06:00
return actualHeight / blockH ;
2015-09-04 08:45:29 -06:00
}
void TextureCache : : TCacheEntryBase : : SetEfbCopy ( u32 stride )
{
is_efb_copy = true ;
memory_stride = stride ;
2015-10-19 10:00:29 -06:00
_assert_msg_ ( VIDEO , memory_stride > = BytesPerRow ( ) , " Memory stride is too small " ) ;
2015-09-04 08:45:29 -06:00
size_in_bytes = memory_stride * NumBlocksY ( ) ;
}
2015-09-05 08:03:49 -06:00
// Fill gamecube memory backing this texture with zeros.
void TextureCache : : TCacheEntryBase : : Zero ( u8 * ptr )
{
for ( u32 i = 0 ; i < NumBlocksY ( ) ; i + + )
{
2015-10-19 10:00:29 -06:00
memset ( ptr , 0 , BytesPerRow ( ) ) ;
2015-09-05 08:03:49 -06:00
ptr + = memory_stride ;
}
}
2015-09-13 05:30:56 -06:00
u64 TextureCache : : TCacheEntryBase : : CalculateHash ( ) const
{
u8 * ptr = Memory : : GetPointer ( addr ) ;
2015-10-19 10:00:29 -06:00
if ( memory_stride = = BytesPerRow ( ) )
2015-09-13 05:30:56 -06:00
{
return GetHash64 ( ptr , size_in_bytes , g_ActiveConfig . iSafeTextureCache_ColorSamples ) ;
}
else
{
u32 blocks = NumBlocksY ( ) ;
u64 temp_hash = size_in_bytes ;
u32 samples_per_row = 0 ;
if ( g_ActiveConfig . iSafeTextureCache_ColorSamples ! = 0 )
{
// Hash at least 4 samples per row to avoid hashing in a bad pattern, like just on the left side of the efb copy
samples_per_row = std : : max ( g_ActiveConfig . iSafeTextureCache_ColorSamples / blocks , 4u ) ;
}
for ( u32 i = 0 ; i < blocks ; i + + )
{
// Multiply by a prime number to mix the hash up a bit. This prevents identical blocks from canceling each other out
2015-10-19 10:00:29 -06:00
temp_hash = ( temp_hash * 397 ) ^ GetHash64 ( ptr , BytesPerRow ( ) , samples_per_row ) ;
2015-09-13 05:30:56 -06:00
ptr + = memory_stride ;
}
return temp_hash ;
}
}