Add support for hybrid XFB

This commit is contained in:
iwubcode
2017-05-29 17:02:09 -05:00
parent 84ca9a4aec
commit 79387dddb2
45 changed files with 400 additions and 985 deletions

View File

@ -250,16 +250,18 @@ static void BPWritten(const BPCmd& bp)
float num_xfb_lines = 1.0f + bpmem.copyTexSrcWH.y * yScale;
u32 height = static_cast<u32>(num_xfb_lines);
if (height > MAX_XFB_HEIGHT)
{
INFO_LOG(VIDEO, "Tried to scale EFB to too many XFB lines: %d (%f)", height, num_xfb_lines);
height = MAX_XFB_HEIGHT;
}
DEBUG_LOG(VIDEO, "RenderToXFB: destAddr: %08x | srcRect {%d %d %d %d} | fbWidth: %u | "
"fbStride: %u | fbHeight: %u",
destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom,
bpmem.copyTexSrcWH.x + 1, destStride, height);
bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, destStride,
is_depth_copy, srcRect, false,
false);
// This stays in to signal end of a "frame"
g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]);
}

View File

@ -21,9 +21,6 @@ FramebufferManagerBase::VirtualXFBListType
std::array<const XFBSourceBase*, FramebufferManagerBase::MAX_VIRTUAL_XFB>
FramebufferManagerBase::m_overlappingXFBArray;
unsigned int FramebufferManagerBase::s_last_xfb_width = 1;
unsigned int FramebufferManagerBase::s_last_xfb_height = 1;
unsigned int FramebufferManagerBase::m_EFBLayers = 1;
FramebufferManagerBase::FramebufferManagerBase()
@ -40,85 +37,6 @@ FramebufferManagerBase::~FramebufferManagerBase()
m_realXFBSource.reset();
}
const XFBSourceBase* const* FramebufferManagerBase::GetXFBSource(u32 xfbAddr, u32 fbWidth,
u32 fbHeight, u32* xfbCountP)
{
if (!g_ActiveConfig.bUseXFB)
return nullptr;
if (g_ActiveConfig.bUseRealXFB)
return GetRealXFBSource(xfbAddr, fbWidth, fbHeight, xfbCountP);
else
return GetVirtualXFBSource(xfbAddr, fbWidth, fbHeight, xfbCountP);
}
const XFBSourceBase* const* FramebufferManagerBase::GetRealXFBSource(u32 xfbAddr, u32 fbWidth,
u32 fbHeight, u32* xfbCountP)
{
*xfbCountP = 1;
// recreate if needed
if (m_realXFBSource &&
(m_realXFBSource->texWidth != fbWidth || m_realXFBSource->texHeight != fbHeight))
m_realXFBSource.reset();
if (!m_realXFBSource && g_framebuffer_manager)
m_realXFBSource = g_framebuffer_manager->CreateXFBSource(fbWidth, fbHeight, 1);
if (!m_realXFBSource)
return nullptr;
m_realXFBSource->srcAddr = xfbAddr;
m_realXFBSource->srcWidth = MAX_XFB_WIDTH;
m_realXFBSource->srcHeight = MAX_XFB_HEIGHT;
m_realXFBSource->texWidth = fbWidth;
m_realXFBSource->texHeight = fbHeight;
m_realXFBSource->sourceRc.left = 0;
m_realXFBSource->sourceRc.top = 0;
m_realXFBSource->sourceRc.right = fbWidth;
m_realXFBSource->sourceRc.bottom = fbHeight;
// Decode YUYV data from GameCube RAM
m_realXFBSource->DecodeToTexture(xfbAddr, fbWidth, fbHeight);
m_overlappingXFBArray[0] = m_realXFBSource.get();
return &m_overlappingXFBArray[0];
}
const XFBSourceBase* const*
FramebufferManagerBase::GetVirtualXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32* xfbCountP)
{
u32 xfbCount = 0;
if (m_virtualXFBList.empty()) // no Virtual XFBs available
return nullptr;
u32 srcLower = xfbAddr;
u32 srcUpper = xfbAddr + 2 * fbWidth * fbHeight;
VirtualXFBListType::reverse_iterator it = m_virtualXFBList.rbegin(),
vlend = m_virtualXFBList.rend();
for (; it != vlend; ++it)
{
VirtualXFB* vxfb = &*it;
u32 dstLower = vxfb->xfbAddr;
u32 dstUpper = vxfb->xfbAddr + 2 * vxfb->xfbWidth * vxfb->xfbHeight;
if (AddressRangesOverlap(srcLower, srcUpper, dstLower, dstUpper))
{
m_overlappingXFBArray[xfbCount] = vxfb->xfbSource.get();
++xfbCount;
}
}
*xfbCountP = xfbCount;
return &m_overlappingXFBArray[0];
}
void FramebufferManagerBase::CopyToXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight,
const EFBRectangle& sourceRc, float Gamma)
{
@ -247,19 +165,3 @@ void FramebufferManagerBase::ReplaceVirtualXFB()
}
}
}
int FramebufferManagerBase::ScaleToVirtualXfbWidth(int x, const TargetRectangle& target_rectangle)
{
if (g_ActiveConfig.RealXFBEnabled())
return x;
return x * target_rectangle.GetWidth() / s_last_xfb_width;
}
int FramebufferManagerBase::ScaleToVirtualXfbHeight(int y, const TargetRectangle& target_rectangle)
{
if (g_ActiveConfig.RealXFBEnabled())
return y;
return y * target_rectangle.GetHeight() / s_last_xfb_height;
}

View File

@ -20,7 +20,6 @@ inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
struct XFBSourceBase
{
virtual ~XFBSourceBase() {}
virtual void DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) = 0;
virtual void CopyEFB(float Gamma) = 0;
@ -50,15 +49,6 @@ public:
static void CopyToXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma);
static const XFBSourceBase* const* GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight,
u32* xfbCount);
static void SetLastXfbWidth(unsigned int width) { s_last_xfb_width = width; }
static void SetLastXfbHeight(unsigned int height) { s_last_xfb_height = height; }
static unsigned int LastXfbWidth() { return s_last_xfb_width; }
static unsigned int LastXfbHeight() { return s_last_xfb_height; }
static int ScaleToVirtualXfbWidth(int x, const TargetRectangle& target_rectangle);
static int ScaleToVirtualXfbHeight(int y, const TargetRectangle& target_rectangle);
static unsigned int GetEFBLayers() { return m_EFBLayers; }
virtual std::pair<u32, u32> GetTargetSize() const = 0;
@ -93,18 +83,10 @@ private:
static void CopyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc,
float Gamma = 1.0f);
static const XFBSourceBase* const* GetRealXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight,
u32* xfbCount);
static const XFBSourceBase* const* GetVirtualXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight,
u32* xfbCount);
static std::unique_ptr<XFBSourceBase> m_realXFBSource; // Only used in Real XFB mode
static VirtualXFBListType m_virtualXFBList; // Only used in Virtual XFB mode
static std::array<const XFBSourceBase*, MAX_VIRTUAL_XFB> m_overlappingXFBArray;
static unsigned int s_last_xfb_width;
static unsigned int s_last_xfb_height;
};
extern std::unique_ptr<FramebufferManagerBase> g_framebuffer_manager;

View File

@ -84,9 +84,6 @@ static float AspectToWidescreen(float aspect)
Renderer::Renderer(int backbuffer_width, int backbuffer_height)
: m_backbuffer_width(backbuffer_width), m_backbuffer_height(backbuffer_height)
{
FramebufferManagerBase::SetLastXfbWidth(MAX_XFB_WIDTH);
FramebufferManagerBase::SetLastXfbHeight(MAX_XFB_HEIGHT);
UpdateActiveConfig();
UpdateDrawRectangle();
CalculateTargetSize();
@ -116,19 +113,6 @@ void Renderer::RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStri
return;
m_xfb_written = true;
if (g_ActiveConfig.bUseXFB)
{
FramebufferManagerBase::CopyToXFB(xfbAddr, fbStride, fbHeight, sourceRc, Gamma);
}
else
{
// The timing is not predictable here. So try to use the XFB path to dump frames.
u64 ticks = CoreTiming::GetTicks();
// below div two to convert from bytes to pixels - it expects width, not stride
Swap(xfbAddr, fbStride / 2, fbStride / 2, fbHeight, sourceRc, ticks, Gamma);
}
}
unsigned int Renderer::GetEFBScale() const
@ -433,9 +417,7 @@ TargetRectangle Renderer::CalculateFrameDumpDrawRectangle() const
rc.top = 0;
// If full-resolution frame dumping is disabled, just use the window draw rectangle.
// Also do this if RealXFB is enabled, since the image has been downscaled for the XFB copy
// anyway, and there's no point writing an upscaled frame with no filtering.
if (!g_ActiveConfig.bInternalResolutionFrameDumps || g_ActiveConfig.RealXFBEnabled())
if (!g_ActiveConfig.bInternalResolutionFrameDumps)
{
// But still remove the borders, since the caller expects this.
rc.right = m_target_rectangle.GetWidth();
@ -663,8 +645,19 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
m_aspect_wide = flush_count_anamorphic > 0.75 * flush_total;
}
// TODO: merge more generic parts into VideoCommon
SwapImpl(xfbAddr, fbWidth, fbStride, fbHeight, rc, ticks, Gamma);
if (xfbAddr && fbWidth && fbStride && fbHeight)
{
constexpr int force_safe_texture_cache_hash = 0;
// Get the current XFB from texture cache
auto* xfb_entry = g_texture_cache->GetTexture(xfbAddr, fbWidth, fbHeight, TextureFormat::XFB,
force_safe_texture_cache_hash);
// TODO, check if xfb_entry is a duplicate of the previous frame and skip SwapImpl
// TODO: merge more generic parts into VideoCommon
g_renderer->SwapImpl(xfb_entry->texture.get(), rc, ticks, Gamma);
}
if (m_xfb_written)
m_fps_counter.Update();

View File

@ -32,6 +32,7 @@
#include "VideoCommon/RenderState.h"
#include "VideoCommon/VideoCommon.h"
class AbstractTexture;
class PostProcessingShaderImplementation;
enum class EFBAccessType;
@ -132,8 +133,7 @@ public:
// Finish up the current frame, print some stats
void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, u64 ticks,
float Gamma = 1.0f);
virtual void SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight,
const EFBRectangle& rc, u64 ticks, float Gamma = 1.0f) = 0;
virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma = 1.0f) = 0;
PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; }
void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; }

View File

@ -238,7 +238,8 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma
if (!decoded_entry)
return nullptr;
decoded_entry->SetGeneralParameters(entry->addr, entry->size_in_bytes, entry->format);
decoded_entry->SetGeneralParameters(entry->addr, entry->size_in_bytes, entry->format,
entry->should_force_safe_hashing);
decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1);
decoded_entry->SetHashes(entry->base_hash, entry->hash);
decoded_entry->frameCount = FRAMECOUNT_INVALID;
@ -462,20 +463,6 @@ static u32 CalculateLevelSize(u32 level_0_size, u32 level)
return std::max(level_0_size >> level, 1u);
}
// Used by TextureCacheBase::Load
TextureCacheBase::TCacheEntry* TextureCacheBase::ReturnEntry(unsigned int stage, TCacheEntry* entry)
{
entry->frameCount = FRAMECOUNT_INVALID;
bound_textures[stage] = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true);
// We need to keep track of invalided textures until they have actually been replaced or re-loaded
valid_bind_points.set(stage);
return entry;
}
void TextureCacheBase::BindTextures()
{
for (size_t i = 0; i < bound_textures.size(); ++i)
@ -625,7 +612,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// if this stage was not invalidated by changes to texture registers, keep the current texture
if (IsValidBindPoint(stage) && bound_textures[stage])
{
return ReturnEntry(stage, bound_textures[stage]);
return bound_textures[stage];
}
const FourTexUnits& tex = bpmem.tex[stage >> 2];
@ -639,7 +626,35 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
const bool use_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]);
u32 tex_levels = use_mipmaps ? ((tex.texMode1[id].max_lod + 0xf) / 0x10 + 1) : 1;
const bool from_tmem = tex.texImage1[id].image_type != 0;
const u32 tmem_address_even = from_tmem ? tex.texImage1[id].tmem_even * TMEM_LINE_SIZE : 0;
const u32 tmem_address_odd = from_tmem ? tex.texImage2[id].tmem_odd * TMEM_LINE_SIZE : 0;
auto entry = GetTexture(address, width, height, texformat,
g_ActiveConfig.iSafeTextureCache_ColorSamples, tlutaddr, tlutfmt,
use_mipmaps, tex_levels, from_tmem, tmem_address_even,
tmem_address_odd);
if (!entry)
return nullptr;
entry->frameCount = FRAMECOUNT_INVALID;
bound_textures[stage] = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true);
// We need to keep track of invalided textures until they have actually been replaced or re-loaded
valid_bind_points.set(stage);
return entry;
}
TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 width, u32 height,
const TextureFormat texformat,
const int textureCacheSafetyColorSampleSize, u32 tlutaddr,
TLUTFormat tlutfmt, bool use_mipmaps,
u32 tex_levels, bool from_tmem, u32 tmem_address_even,
u32 tmem_address_odd)
{
// TexelSizeInNibbles(format) * width * height / 16;
const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat);
const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat);
@ -683,9 +698,12 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
}
// TODO: the texture cache lookup is based on address, but a texture from tmem has no reason
// to have a unique and valid address. This could result in a regular texture and a tmem
// texture aliasing onto the same texture cache entry.
const u8* src_data;
if (from_tmem)
src_data = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE];
src_data = &texMem[tmem_address_even];
else
src_data = Memory::GetPointer(address);
@ -704,13 +722,13 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data
// from the low tmem bank than it should)
base_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
base_hash = GetHash64(src_data, texture_size, textureCacheSafetyColorSampleSize);
u32 palette_size = 0;
if (isPaletteTexture)
{
palette_size = TexDecoder_GetPaletteSize(texformat);
full_hash = base_hash ^ GetHash64(&texMem[tlutaddr], palette_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples);
textureCacheSafetyColorSampleSize);
}
else
{
@ -789,7 +807,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// texture formats. I'm not sure what effect checking width/height/levels
// would have.
if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion)
return ReturnEntry(stage, entry);
return entry;
// Note that we found an unconverted EFB copy, then continue. We'll
// perform the conversion later. Currently, we only convert EFB copies to
@ -816,7 +834,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
return ReturnEntry(stage, entry);
return entry;
}
}
@ -841,7 +859,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
if (decoded_entry)
{
return ReturnEntry(stage, decoded_entry);
return decoded_entry;
}
}
@ -851,9 +869,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// textures cause unnecessary slowdowns
// Example: Tales of Symphonia (GC) uses over 500 small textures in menus, but only around 70
// different ones
if (g_ActiveConfig.iSafeTextureCache_ColorSamples == 0 ||
if (textureCacheSafetyColorSampleSize == 0 ||
std::max(texture_size, palette_size) <=
(u32)g_ActiveConfig.iSafeTextureCache_ColorSamples * 8)
(u32)textureCacheSafetyColorSampleSize * 8)
{
auto hash_range = textures_by_hash.equal_range(full_hash);
TexHashCache::iterator hash_iter = hash_range.first;
@ -866,7 +884,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
{
entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt);
return ReturnEntry(stage, entry);
return entry;
}
++hash_iter;
}
@ -936,64 +954,66 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// Initialized to null because only software loading uses this buffer
u8* dst_buffer = nullptr;
if (!hires_tex && decode_on_gpu)
if (!hires_tex)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, row_stride, tlut, tlutfmt);
}
else if (!hires_tex)
{
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
// Allocate memory for all levels at once
size_t total_texture_size = decoded_texture_size;
// For the downsample, we need 2 buffers; 1 is 1/4 of the original texture, the other 1/16
size_t mip_downsample_buffer_size = decoded_texture_size * 5 / 16;
size_t prev_level_size = decoded_texture_size;
for (u32 i = 1; i < tex_levels; ++i)
if (decode_on_gpu)
{
prev_level_size /= 4;
total_texture_size += prev_level_size;
}
// Add space for the downsampling at the end
total_texture_size += mip_downsample_buffer_size;
CheckTempSize(total_texture_size);
dst_buffer = temp;
if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{
TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut,
tlutfmt);
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(
entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, row_stride, tlut, tlutfmt);
}
else
{
u8* src_data_gb =
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth,
expandedHeight);
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
// Allocate memory for all levels at once
size_t total_texture_size = decoded_texture_size;
// For the downsample, we need 2 buffers; 1 is 1/4 of the original texture, the other 1/16
size_t mip_downsample_buffer_size = decoded_texture_size * 5 / 16;
size_t prev_level_size = decoded_texture_size;
for (u32 i = 1; i < tex_levels; ++i)
{
prev_level_size /= 4;
total_texture_size += prev_level_size;
}
// Add space for the downsampling at the end
total_texture_size += mip_downsample_buffer_size;
CheckTempSize(total_texture_size);
dst_buffer = temp;
if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{
TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut,
tlutfmt);
}
else
{
u8* src_data_gb =
&texMem[tmem_address_odd];
TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth, expandedHeight);
}
entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size);
arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer);
dst_buffer += decoded_texture_size;
}
entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size);
arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer);
dst_buffer += decoded_texture_size;
}
iter = textures_by_address.emplace(address, entry);
if (g_ActiveConfig.iSafeTextureCache_ColorSamples == 0 ||
if (textureCacheSafetyColorSampleSize == 0 ||
std::max(texture_size, palette_size) <=
(u32)g_ActiveConfig.iSafeTextureCache_ColorSamples * 8)
(u32)textureCacheSafetyColorSampleSize * 8)
{
entry->textures_by_hash_iter = textures_by_hash.emplace(full_hash, entry);
}
entry->SetGeneralParameters(address, texture_size, full_format);
entry->SetGeneralParameters(address, texture_size, full_format, false);
entry->SetDimensions(nativeW, nativeH, tex_levels);
entry->SetHashes(base_hash, full_hash);
entry->is_efb_copy = false;
@ -1025,9 +1045,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
const u8* ptr_odd = nullptr;
if (from_tmem)
{
ptr_even = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE +
texture_size];
ptr_odd = &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
ptr_even = &texMem[tmem_address_even + texture_size];
ptr_odd = &texMem[tmem_address_odd];
}
for (u32 level = 1; level != texLevels; ++level)
@ -1081,7 +1100,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
return ReturnEntry(stage, entry);
return entry;
}
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat,
@ -1159,6 +1178,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
unsigned int cbufid = UINT_MAX;
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
bool efbHasAlpha = srcFormat == PEControl::RGBA6_Z24;
bool copy_to_ram = !g_ActiveConfig.bSkipEFBCopyToRam;
bool copy_to_vram = g_ActiveConfig.backend_info.bSupportsCopyToVram;
bool is_xfb_copy = false;
if (is_depth_copy)
{
@ -1388,6 +1411,15 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
}
break;
case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
ColorMask[3] = 0.0f;
fConstAdd[3] = 1.0f;
cbufid = 30; // just re-use the RGBX8 cbufid from above
copy_to_ram = g_ActiveConfig.bUseRealXFB;
is_xfb_copy = true;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(dstFormat));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
@ -1430,9 +1462,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * dstStride;
bool copy_to_ram = !g_ActiveConfig.bSkipEFBCopyToRam;
bool copy_to_vram = true;
if (copy_to_ram)
{
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
@ -1524,7 +1553,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
if (entry)
{
entry->SetGeneralParameters(dstAddr, 0, baseFormat);
entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
entry->SetDimensions(tex_w, tex_h, 1);
entry->frameCount = FRAMECOUNT_INVALID;
@ -1537,7 +1566,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
if (g_ActiveConfig.bDumpEFBTarget)
if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy)
{
static int count = 0;
entry->texture->Save(StringFromFormat("%sefb_frame_%i.png",
@ -1699,12 +1728,22 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride)
size_in_bytes = memory_stride * NumBlocksY();
}
int TextureCacheBase::TCacheEntry::HashSampleSize() const
{
if (should_force_safe_hashing)
{
return 0;
}
return g_ActiveConfig.iSafeTextureCache_ColorSamples;
}
u64 TextureCacheBase::TCacheEntry::CalculateHash() const
{
u8* ptr = Memory::GetPointer(addr);
if (memory_stride == BytesPerRow())
{
return GetHash64(ptr, size_in_bytes, g_ActiveConfig.iSafeTextureCache_ColorSamples);
return GetHash64(ptr, size_in_bytes, HashSampleSize());
}
else
{
@ -1712,11 +1751,11 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const
u64 temp_hash = size_in_bytes;
u32 samples_per_row = 0;
if (g_ActiveConfig.iSafeTextureCache_ColorSamples != 0)
if (HashSampleSize() != 0)
{
// Hash at least 4 samples per row to avoid hashing in a bad pattern, like just on the left
// side of the efb copy
samples_per_row = std::max(g_ActiveConfig.iSafeTextureCache_ColorSamples / blocks, 4u);
samples_per_row = std::max(HashSampleSize() / blocks, 4u);
}
for (u32 i = 0; i < blocks; i++)

View File

@ -84,6 +84,7 @@ public:
bool tmem_only = false; // indicates that this texture only exists in the tmem cache
bool has_arbitrary_mips = false; // indicates that the mips in this texture are arbitrary
// content, aren't just downscaled
bool should_force_safe_hashing = false; // for XFB
unsigned int native_width,
native_height; // Texture dimensions from the GameCube's point of view
@ -105,11 +106,12 @@ public:
~TCacheEntry();
void SetGeneralParameters(u32 _addr, u32 _size, TextureAndTLUTFormat _format)
void SetGeneralParameters(u32 _addr, u32 _size, TextureAndTLUTFormat _format, bool force_safe_hashing)
{
addr = _addr;
size_in_bytes = _size;
format = _format;
should_force_safe_hashing = force_safe_hashing;
}
void SetDimensions(unsigned int _native_width, unsigned int _native_height,
@ -145,6 +147,7 @@ public:
u64 CalculateHash() const;
int HashSampleSize() const;
u32 GetWidth() const { return texture->GetConfig().width; }
u32 GetHeight() const { return texture->GetConfig().height; }
u32 GetNumLevels() const { return texture->GetConfig().levels; }
@ -172,7 +175,12 @@ public:
TCacheEntry* Load(const u32 stage);
static void InvalidateAllBindPoints() { valid_bind_points.reset(); }
static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); }
void BindTextures();
TCacheEntry* GetTexture(u32 address, u32 width, u32 height, const TextureFormat texformat,
const int textureCacheSafetyColorSampleSize, u32 tlutaddr = 0,
TLUTFormat tlutfmt = TLUTFormat::IA8, bool use_mipmaps = false,
u32 tex_levels = 1, bool from_tmem = false, u32 tmem_address_even = 0,
u32 tmem_address_odd = 0);
virtual void BindTextures();
void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 dstStride,
bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf);
@ -248,8 +256,6 @@ private:
// Removes and unlinks texture from texture cache and returns it to the pool
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter);
TCacheEntry* ReturnEntry(unsigned int stage, TCacheEntry* entry);
TexAddrCache textures_by_address;
TexHashCache textures_by_hash;
TexPool texture_pool;

View File

@ -49,6 +49,8 @@ u16 GetEncodedSampleCount(EFBCopyFormat format)
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return 2;
case EFBCopyFormat::XFB:
return 2;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEncodedSampleCount)", static_cast<int>(format));
return 1;
@ -656,6 +658,28 @@ static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& para
WriteEncoderEnd(p);
}
static void WriteXFBEncoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, EFBCopyFormat::XFB, ApiType);
WRITE(p, " float3 y_const = float3(0.257, 0.504, 0.098);\n");
WRITE(p, " float3 u_const = float3(-0.148, -0.291, 0.439);\n");
WRITE(p, " float3 v_const = float3(0.439, -0.368, -0.071);\n");
WRITE(p, " float3 color0;\n");
WRITE(p, " float3 color1;\n");
WriteSampleColor(p, "rgb", "color0", 0, ApiType, params);
WriteSampleColor(p, "rgb", "color1", 1, ApiType, params);
WRITE(p, " float3 average = (color0 + color1) * 0.5;\n");
WRITE(p, " ocol0.b = dot(color0, y_const) + 0.0625;\n");
WRITE(p, " ocol0.g = dot(average, u_const) + 0.5;\n");
WRITE(p, " ocol0.r = dot(color1, y_const) + 0.0625;\n");
WRITE(p, " ocol0.a = dot(average, v_const) + 0.5;\n");
WriteEncoderEnd(p);
}
const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type)
{
text[sizeof(text) - 1] = 0x7C; // canary
@ -728,6 +752,9 @@ const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type
else
WriteCC8Encoder(p, "gb", api_type, params);
break;
case EFBCopyFormat::XFB:
WriteXFBEncoder(p, api_type, params);
break;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GenerateEncodingShader)",
static_cast<int>(params.copy_format));

View File

@ -28,6 +28,11 @@ enum class TextureFormat
C8 = 0x9,
C14X2 = 0xA,
CMPR = 0xE,
// Special texture format used to represent YUVY xfb copies.
// They aren't really textures, but they share so much hardware and usecases that it makes sense
// to emulate them as part of texture cache.
XFB = 0xF,
};
static inline bool IsColorIndexed(TextureFormat format)
@ -73,6 +78,11 @@ enum class EFBCopyFormat
B8 = 0xA, // B8, Z8L
RG8 = 0xB, // RG8, Z16R (Note: G and R are reversed)
GB8 = 0xC, // GB8, Z16L
// Special texture format used to represent YUVY xfb copies.
// They aren't really textures, but they share so much hardware and usecases that it makes sense
// to emulate them as part of texture cache.
XFB = 0xF,
};
enum class TLUTFormat

View File

@ -3,9 +3,11 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <cmath>
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"
@ -46,6 +48,9 @@ int TexDecoder_GetTexelSizeInNibbles(TextureFormat format)
// Compressed format
case TextureFormat::CMPR:
return 1;
// Special formats
case TextureFormat::XFB:
return 4;
default:
PanicAlert("Invalid Texture Format (0x%X)! (GetTexelSizeInNibbles)", static_cast<int>(format));
return 1;
@ -82,6 +87,9 @@ int TexDecoder_GetBlockWidthInTexels(TextureFormat format)
// Compressed format
case TextureFormat::CMPR:
return 8;
// Special formats
case TextureFormat::XFB:
return 16;
default:
PanicAlert("Invalid Texture Format (0x%X)! (GetBlockWidthInTexels)", static_cast<int>(format));
return 8;
@ -113,6 +121,9 @@ int TexDecoder_GetBlockHeightInTexels(TextureFormat format)
// Compressed format
case TextureFormat::CMPR:
return 8;
// Special formats
case TextureFormat::XFB:
return 1;
default:
PanicAlert("Invalid Texture Format (0x%X)! (GetBlockHeightInTexels)", static_cast<int>(format));
return 4;
@ -144,6 +155,9 @@ int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format)
// 32-bit formats
case EFBCopyFormat::RGBA8:
return 4;
// Special formats
case EFBCopyFormat::XFB:
return 16;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBlockWidthInTexels)",
static_cast<int>(format));
@ -176,6 +190,9 @@ int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format)
// 32-bit formats
case EFBCopyFormat::RGBA8:
return 4;
// Special formats
case EFBCopyFormat::XFB:
return 1;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBlockHeightInTexels)",
static_cast<int>(format));
@ -226,6 +243,8 @@ TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format)
return TextureFormat::RGB5A3;
case EFBCopyFormat::RGBA8:
return TextureFormat::RGBA8;
case EFBCopyFormat::XFB:
return TextureFormat::XFB;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBaseFormat)", static_cast<int>(format));
return static_cast<TextureFormat>(format);
@ -247,7 +266,7 @@ static const char* texfmt[] = {
"0x1C", "0x1D", "0x1E", "0x1F",
// pixel + copy
"CR4", "0x21", "CRA4", "CRA8", "0x24", "0x25", "CYUVA8", "CA8", "CR8", "CG8", "CB8", "CRG8",
"CGB8", "0x2D", "0x2E", "0x2F",
"CGB8", "0x2D", "0x2E", "XFB",
// Z + copy
"CZ4", "0x31", "0x32", "0x33", "0x34", "0x35", "0x36", "0x37", "0x38", "CZ8M", "CZ8L", "0x3B",
"CZ16L", "0x3D", "0x3E", "0x3F",
@ -619,6 +638,24 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = color;
}
break;
case TextureFormat::XFB:
{
// TODO: I should kind of like... ACTUALLY TEST THIS!!!!!
size_t offset = (t * imageWidth + (s & (~1))) * 2;
// We do this one color sample (aka 2 RGB pixles) at a time
int Y = int((s & 1) == 0 ? src[offset] : src[offset + 2]) - 16;
int U = int(src[offset + 1]) - 128;
int V = int(src[offset + 3]) - 128;
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
u8 R = MathUtil::Clamp(int(1.164f * Y + 1.596f * V), 0, 255);
u8 G = MathUtil::Clamp(int(1.164f * Y - 0.392f * U - 0.813f * V), 0, 255);
u8 B = MathUtil::Clamp(int(1.164f * Y + 2.017f * U), 0, 255);
dst[t * imageWidth + s] = 0xff000000 | B << 16 | G << 8 | R;
}
break;
}
}

View File

@ -9,6 +9,7 @@
#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
#include "Common/Intrinsics.h"
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"
@ -1485,6 +1486,37 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, Text
case TextureFormat::CMPR:
TexDecoder_DecodeImpl_CMPR(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
case TextureFormat::XFB:
{
for (int y = 0; y < height; y += 1)
{
for (int x = 0; x < width; x += 2)
{
size_t offset = static_cast<size_t>((y * width + x) * 2);
// We do this one color sample (aka 2 RGB pixles) at a time
int Y1 = int(src[offset]) - 16;
int U = int(src[offset + 1]) - 128;
int Y2 = int(src[offset + 2]) - 16;
int V = int(src[offset + 3]) - 128;
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
u8 R1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 + 1.596f * V), 0, 255));
u8 G1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 - 0.392f * U - 0.813f * V), 0, 255));
u8 B1 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y1 + 2.017f * U), 0, 255));
u8 R2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 + 1.596f * V), 0, 255));
u8 G2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 - 0.392f * U - 0.813f * V), 0, 255));
u8 B2 = static_cast<u8>(MathUtil::Clamp(int(1.164f * Y2 + 2.017f * U), 0, 255));
dst[y * width + x] = 0xff000000 | B1 << 16 | G1 << 8 | R1;
dst[y * width + x + 1] = 0xff000000 | B2 << 16 | G2 << 8 | R2;
}
}
}
break;
default:
PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)",

View File

@ -55,6 +55,8 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
return (RECT*)this;
}
#endif
TargetRectangle(const MathUtil::Rectangle<int> &other) : MathUtil::Rectangle<int>(other) {}
TargetRectangle() = default;
};
#ifdef _WIN32

View File

@ -221,6 +221,7 @@ struct VideoConfig final
bool bSupportsInternalResolutionFrameDumps;
bool bSupportsGPUTextureDecoding;
bool bSupportsST3CTextures;
bool bSupportsCopyToVram;
bool bSupportsBitfield; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsBPTCTextures;