Move most backend functionality to VideoCommon

This commit is contained in:
Stenzek
2019-02-15 11:59:50 +10:00
parent 933f3ba008
commit f039149198
182 changed files with 8334 additions and 15917 deletions

View File

@ -28,17 +28,21 @@
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FramebufferManagerBase.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/HiresTextures.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
@ -51,8 +55,9 @@ std::unique_ptr<TextureCacheBase> g_texture_cache;
std::bitset<8> TextureCacheBase::valid_bind_points;
TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr<AbstractTexture> tex)
: texture(std::move(tex))
TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr<AbstractTexture> tex,
std::unique_ptr<AbstractFramebuffer> fb)
: texture(std::move(tex)), framebuffer(std::move(fb))
{
}
@ -89,6 +94,25 @@ TextureCacheBase::TextureCacheBase()
InvalidateAllBindPoints();
}
TextureCacheBase::~TextureCacheBase()
{
HiresTexture::Shutdown();
Invalidate();
Common::FreeAlignedMemory(temp);
temp = nullptr;
}
bool TextureCacheBase::Initialize()
{
if (!CreateUtilityTextures())
{
PanicAlert("Failed to create utility textures.");
return false;
}
return true;
}
void TextureCacheBase::Invalidate()
{
FlushEFBCopies();
@ -108,14 +132,6 @@ void TextureCacheBase::Invalidate()
texture_pool.clear();
}
TextureCacheBase::~TextureCacheBase()
{
HiresTexture::Shutdown();
Invalidate();
Common::FreeAlignedMemory(temp);
temp = nullptr;
}
void TextureCacheBase::OnConfigChanged(VideoConfig& config)
{
if (config.bHiresTextures != backup_config.hires_textures ||
@ -139,14 +155,6 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
g_ActiveConfig.bTexFmtOverlayCenter);
}
if ((config.stereo_mode != StereoMode::Off) != backup_config.stereo_3d ||
config.bStereoEFBMonoDepth != backup_config.efb_mono_depth)
{
g_texture_cache->DeleteShaders();
if (!g_texture_cache->CompileShaders())
PanicAlert("Failed to recompile one or more texture conversion shaders.");
}
SetBackupConfig(config);
}
@ -243,7 +251,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma
{
TextureConfig new_config = entry->texture->GetConfig();
new_config.levels = 1;
new_config.rendertarget = true;
new_config.flags |= AbstractTextureFlag_RenderTarget;
TCacheEntry* decoded_entry = AllocateCacheEntry(new_config);
if (!decoded_entry)
@ -279,29 +287,27 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
return;
}
TextureConfig newconfig;
newconfig.width = new_width;
newconfig.height = new_height;
newconfig.layers = entry->GetNumLayers();
newconfig.rendertarget = true;
std::unique_ptr<AbstractTexture> new_texture = AllocateTexture(newconfig);
if (new_texture)
const TextureConfig newconfig(new_width, new_height, 1, entry->GetNumLayers(), 1,
AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget);
std::optional<TexPoolEntry> new_texture = AllocateTexture(newconfig);
if (!new_texture)
{
new_texture->ScaleRectangleFromTexture(entry->texture.get(),
entry->texture->GetConfig().GetRect(),
new_texture->GetConfig().GetRect());
entry->texture.swap(new_texture);
ERROR_LOG(VIDEO, "Scaling failed due to texture allocation failure");
return;
}
auto config = new_texture->GetConfig();
// At this point new_texture has the old texture in it,
// we can potentially reuse this, so let's move it back to the pool
texture_pool.emplace(config, TexPoolEntry(std::move(new_texture)));
}
else
{
ERROR_LOG(VIDEO, "Scaling failed");
}
// No need to convert the coordinates here since they'll be the same.
g_renderer->ScaleTexture(new_texture->framebuffer.get(),
new_texture->texture->GetConfig().GetRect(), entry->texture.get(),
entry->texture->GetConfig().GetRect());
entry->texture.swap(new_texture->texture);
entry->framebuffer.swap(new_texture->framebuffer);
// At this point new_texture has the old texture in it,
// we can potentially reuse this, so let's move it back to the pool
auto config = new_texture->texture->GetConfig();
texture_pool.emplace(
config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer)));
}
TextureCacheBase::TCacheEntry*
@ -747,8 +753,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
entry->frameCount = FRAMECOUNT_INVALID;
bound_textures[stage] = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true);
// We need to keep track of invalided textures until they have actually been replaced or
// re-loaded
valid_bind_points.set(stage);
@ -1036,25 +1040,17 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(texformat, tlutfmt) &&
!(from_tmem && texformat == TextureFormat::RGBA8);
const bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
!(from_tmem && texformat == TextureFormat::RGBA8);
// create the entry/texture
TextureConfig config;
config.width = width;
config.height = height;
config.levels = texLevels;
config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8;
ArbitraryMipmapDetector arbitrary_mip_detector;
const TextureConfig config(width, height, texLevels, 1, 1,
hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8, 0);
TCacheEntry* entry = AllocateCacheEntry(config);
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
if (!entry)
return nullptr;
ArbitraryMipmapDetector arbitrary_mip_detector;
const u8* tlut = &texMem[tlutaddr];
if (hires_tex)
{
@ -1068,14 +1064,10 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
if (!hires_tex)
{
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width,
height, expandedWidth, expandedHeight, row_stride, tlut,
tlutfmt);
}
else
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, bytes_per_block * (expandedWidth / bsw),
tlut, tlutfmt))
{
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
@ -1168,20 +1160,16 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);
const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
size_t mip_size =
const u32 mip_size =
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expanded_mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat,
mip_width, mip_height, expanded_mip_width,
expanded_mip_height, row_stride, tlut, tlutfmt);
}
else
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat, mip_width,
mip_height, expanded_mip_width, expanded_mip_height,
bytes_per_block * (expanded_mip_width / bsw), tlut, tlutfmt))
{
// No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning
size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
const u32 decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height,
texformat, tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer,
@ -1212,6 +1200,8 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
// This should only be needed if the texture was updated, or used GPU decoding.
entry->texture->FinishedRendering();
return entry;
}
@ -1379,7 +1369,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
// or as a container for overlapping textures, never need to be combined
// with other textures
TCacheEntry* stitched_entry =
CreateNormalTexture(tex_info, FramebufferManagerBase::GetEFBLayers());
CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers());
stitched_entry->may_have_overlapping_textures = false;
// It is possible that some of the overlapping textures overlap each other.
@ -1540,6 +1530,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
return nullptr;
}
stitched_entry->texture->FinishedRendering();
return stitched_entry;
}
@ -1547,17 +1538,10 @@ TextureCacheBase::TCacheEntry*
TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers)
{
// create the entry/texture
TextureConfig config;
config.width = tex_info.native_width;
config.height = tex_info.native_height;
config.levels = tex_info.computed_levels;
config.format = AbstractTextureFormat::RGBA8;
config.rendertarget = true;
config.layers = layers;
const TextureConfig config(tex_info.native_width, tex_info.native_height,
tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8,
AbstractTextureFlag_RenderTarget);
TCacheEntry* entry = AllocateCacheEntry(config);
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
if (!entry)
return nullptr;
@ -1590,15 +1574,15 @@ TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info)
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu = g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(tex_info.full_format.texfmt,
tex_info.full_format.tlutfmt) &&
!(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8);
const bool decode_on_gpu =
g_ActiveConfig.UseGPUTextureDecoding() &&
!(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8);
// Since it's coming from RAM, it can only have one layer (no stereo).
TCacheEntry* entry = CreateNormalTexture(tex_info, 1);
entry->may_have_overlapping_textures = false;
LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu);
entry->texture->FinishedRendering();
return entry;
}
@ -1608,15 +1592,13 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
{
const u8* tlut = &texMem[tex_info.tlut_address];
if (decode_on_gpu)
{
u32 row_stride = tex_info.bytes_per_block * (tex_info.expanded_width / tex_info.block_width);
g_texture_cache->DecodeTextureOnGPU(
entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, tex_info.full_format.texfmt,
tex_info.native_width, tex_info.native_height, tex_info.expanded_width,
tex_info.expanded_height, row_stride, tlut, tex_info.full_format.tlutfmt);
}
else
if (!decode_on_gpu ||
DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes,
tex_info.full_format.texfmt, tex_info.native_width, tex_info.native_height,
tex_info.expanded_width, tex_info.expanded_height,
tex_info.bytes_per_block *
(tex_info.expanded_width / tex_info.block_width),
tlut, tex_info.full_format.tlutfmt))
{
size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height;
CheckTempSize(decoded_texture_size);
@ -1637,12 +1619,12 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
}
}
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients(
const CopyFilterCoefficients::Values& coefficients) const
EFBCopyFilterCoefficients
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return {{
return EFBCopyFilterCoefficients{
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
@ -1650,31 +1632,31 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f,
}};
};
}
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
const CopyFilterCoefficients::Values& coefficients) const
EFBCopyFilterCoefficients
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients);
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter)
return res;
// Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res[1] += res[0] + res[2];
res[0] = 0;
res[2] = 0;
res.middle = res.upper + res.middle + res.lower;
res.upper = 0.0f;
res.lower = 0.0f;
return res;
}
bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const
bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients[0] != 0 || coefficients[2] != 0;
return coefficients.upper != 0 || coefficients.lower != 0;
}
void TextureCacheBase::CopyRenderTargetToTexture(
@ -1816,12 +1798,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_vram)
{
// create the texture
TextureConfig config;
config.rendertarget = true;
config.width = scaled_tex_w;
config.height = scaled_tex_h;
config.layers = FramebufferManagerBase::GetEFBLayers();
const TextureConfig config(scaled_tex_w, scaled_tex_h, 1, g_framebuffer_manager->GetEFBLayers(),
1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget);
entry = AllocateCacheEntry(config);
if (entry)
{
@ -1866,7 +1844,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
@ -2006,11 +1984,6 @@ void TextureCacheBase::FlushEFBCopies()
m_pending_efb_copies.clear();
}
TextureConfig TextureCacheBase::GetEncodingTextureConfig()
{
return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true);
}
void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
std::unique_ptr<AbstractStagingTexture> staging_texture)
{
@ -2069,8 +2042,8 @@ std::unique_ptr<AbstractStagingTexture> TextureCacheBase::GetEFBCopyStagingTextu
return ptr;
}
std::unique_ptr<AbstractStagingTexture> tex =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig());
std::unique_ptr<AbstractStagingTexture> tex = g_renderer->CreateStagingTexture(
StagingTextureType::Readback, m_efb_encoding_texture->GetConfig());
if (!tex)
WARN_LOG(VIDEO, "Failed to create EFB copy staging texture");
@ -2127,37 +2100,50 @@ void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_
TextureCacheBase::TCacheEntry* TextureCacheBase::AllocateCacheEntry(const TextureConfig& config)
{
std::unique_ptr<AbstractTexture> texture = AllocateTexture(config);
if (!texture)
{
std::optional<TexPoolEntry> alloc = AllocateTexture(config);
if (!alloc)
return nullptr;
}
TCacheEntry* cacheEntry = new TCacheEntry(std::move(texture));
TCacheEntry* cacheEntry =
new TCacheEntry(std::move(alloc->texture), std::move(alloc->framebuffer));
cacheEntry->textures_by_hash_iter = textures_by_hash.end();
cacheEntry->id = last_entry_id++;
return cacheEntry;
}
std::unique_ptr<AbstractTexture> TextureCacheBase::AllocateTexture(const TextureConfig& config)
std::optional<TextureCacheBase::TexPoolEntry>
TextureCacheBase::AllocateTexture(const TextureConfig& config)
{
TexPool::iterator iter = FindMatchingTextureFromPool(config);
std::unique_ptr<AbstractTexture> entry;
if (iter != texture_pool.end())
{
entry = std::move(iter->second.texture);
auto entry = std::move(iter->second);
texture_pool.erase(iter);
return std::move(entry);
}
else
std::unique_ptr<AbstractTexture> texture = g_renderer->CreateTexture(config);
if (!texture)
{
entry = g_renderer->CreateTexture(config);
if (!entry)
return nullptr;
INCSTAT(stats.numTexturesCreated);
WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u texture", config.width, config.height,
config.layers);
return {};
}
return entry;
std::unique_ptr<AbstractFramebuffer> framebuffer;
if (config.IsRenderTarget())
{
framebuffer = g_renderer->CreateFramebuffer(texture.get(), nullptr);
if (!framebuffer)
{
WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u framebuffer", config.width, config.height,
config.layers);
return {};
}
}
INCSTAT(stats.numTexturesCreated);
return TexPoolEntry(std::move(texture), std::move(framebuffer));
}
TextureCacheBase::TexPool::iterator
@ -2170,7 +2156,7 @@ TextureCacheBase::FindMatchingTextureFromPool(const TextureConfig& config)
// As non-render-target textures are usually static, this should not matter much.
auto range = texture_pool.equal_range(config);
auto matching_iter = std::find_if(range.first, range.second, [](const auto& iter) {
return iter.first.rendertarget || iter.second.frameCount != FRAMECOUNT_INVALID;
return iter.first.IsRenderTarget() || iter.second.frameCount != FRAMECOUNT_INVALID;
});
return matching_iter != range.second ? matching_iter : texture_pool.end();
}
@ -2261,7 +2247,8 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe
}
auto config = entry->texture->GetConfig();
texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture)));
texture_pool.emplace(config,
TexPoolEntry(std::move(entry->texture), std::move(entry->framebuffer)));
// Don't delete if there's a pending EFB copy, as we need the TCacheEntry alive.
if (!entry->pending_efb_copy)
@ -2270,6 +2257,283 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe
return textures_by_address.erase(iter);
}
bool TextureCacheBase::CreateUtilityTextures()
{
constexpr TextureConfig encoding_texture_config(
EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, AbstractTextureFlag_RenderTarget);
m_efb_encoding_texture = g_renderer->CreateTexture(encoding_texture_config);
if (!m_efb_encoding_texture)
return false;
m_efb_encoding_framebuffer = g_renderer->CreateFramebuffer(m_efb_encoding_texture.get(), nullptr);
if (!m_efb_encoding_framebuffer)
return false;
if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding)
{
constexpr TextureConfig decoding_texture_config(
1024, 1024, 1, 1, 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_ComputeImage);
m_decoding_texture = g_renderer->CreateTexture(decoding_texture_config);
if (!m_decoding_texture)
return false;
}
return true;
}
void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
dst_format, is_depth_copy, is_intensity, scale_by_half,
NeedsCopyFilterInShader(filter_coefficients)));
if (!copy_pipeline)
{
WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
return;
}
const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect);
AbstractTexture* src_texture =
is_depth_copy ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) :
g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect);
g_renderer->BeginUtilityDrawing();
// Fill uniform buffer.
struct Uniforms
{
float src_left, src_top, src_width, src_height;
float filter_coefficients[3];
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float pixel_height;
u32 padding;
};
Uniforms uniforms;
const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle(
scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer());
const float rcp_efb_width = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBWidth());
const float rcp_efb_height = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBHeight());
uniforms.src_left = framebuffer_rect.left * rcp_efb_width;
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.gamma_rcp = 1.0f / gamma;
uniforms.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f;
uniforms.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f;
uniforms.pixel_height = g_ActiveConfig.bCopyEFBScaled ? rcp_efb_height : 1.0f / EFB_HEIGHT;
uniforms.padding = 0;
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
// Use the copy pipeline to render the VRAM copy.
g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get());
g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect());
g_renderer->SetPipeline(copy_pipeline);
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, scale_by_half ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
entry->texture->FinishedRendering();
}
void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToRAMPipeline(params);
if (!copy_pipeline)
{
WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
return;
}
const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect);
AbstractTexture* src_texture =
params.depth ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) :
g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect);
g_renderer->BeginUtilityDrawing();
// Fill uniform buffer.
struct Uniforms
{
std::array<s32, 4> position_uniform;
float y_scale;
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float filter_coefficients[3];
u32 padding;
};
Uniforms encoder_params;
const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle(
scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer());
const float rcp_efb_height = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBHeight());
encoder_params.position_uniform[0] = scaled_src_rect.left;
encoder_params.position_uniform[1] = scaled_src_rect.top;
encoder_params.position_uniform[2] = static_cast<s32>(native_width);
encoder_params.position_uniform[3] = scale_by_half ? 2 : 1;
encoder_params.y_scale = y_scale;
encoder_params.gamma_rcp = 1.0f / gamma;
encoder_params.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f;
encoder_params.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f;
encoder_params.filter_coefficients[0] = filter_coefficients.upper;
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
// complex down filtering to average all pixels and produce the correct result.
const bool linear_filter =
(scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f;
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
const u32 render_width = bytes_per_row / sizeof(u32);
const u32 render_height = num_blocks_y;
const auto encode_rect = MathUtil::Rectangle<int>(0, 0, render_width, render_height);
// Render to GPU texture, and then copy to CPU-accessible texture.
g_renderer->SetAndDiscardFramebuffer(m_efb_encoding_framebuffer.get());
g_renderer->SetViewportAndScissor(encode_rect);
g_renderer->SetPipeline(copy_pipeline);
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
dst->CopyFromTexture(m_efb_encoding_texture.get(), encode_rect, 0, 0, encode_rect);
g_renderer->EndUtilityDrawing();
// Flush if there's sufficient draws between this copy and the last.
g_vertex_manager->OnEFBCopyToRAM();
}
bool TextureCacheBase::ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted,
const void* palette, TLUTFormat format)
{
DEBUG_ASSERT(entry->texture->GetConfig().IsRenderTarget() && entry->framebuffer);
if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
ERROR_LOG(VIDEO, "Backend does not support palette conversion!");
return false;
}
g_renderer->BeginUtilityDrawing();
const u32 palette_size = unconverted->format == TextureFormat::I4 ? 32 : 512;
u32 texel_buffer_offset;
if (!g_vertex_manager->UploadTexelBuffer(palette, palette_size,
TexelBufferFormat::TEXEL_BUFFER_FORMAT_R16_UINT,
&texel_buffer_offset))
{
ERROR_LOG(VIDEO, "Texel buffer upload failed");
return false;
}
struct Uniforms
{
float multiplier;
u32 texel_buffer_offset;
u32 pad[2];
};
static_assert(std::is_standard_layout<Uniforms>::value);
Uniforms uniforms = {};
uniforms.multiplier = unconverted->format == TextureFormat::I4 ? 15.0f : 255.0f;
uniforms.texel_buffer_offset = texel_buffer_offset;
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get());
g_renderer->SetViewportAndScissor(entry->texture->GetRect());
g_renderer->SetPipeline(g_shader_cache->GetPaletteConversionPipeline(format));
g_renderer->SetTexture(1, unconverted->texture.get());
g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
entry->texture->FinishedRendering();
return true;
}
bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
u32 data_size, TextureFormat format, u32 width,
u32 height, u32 aligned_width, u32 aligned_height,
u32 row_stride, const u8* palette,
TLUTFormat palette_format)
{
const auto* info = TextureConversionShaderTiled::GetDecodingShaderInfo(format);
if (!info)
return false;
const AbstractShader* shader = g_shader_cache->GetTextureDecodingShader(format, palette_format);
if (!shader)
return false;
// Copy to GPU-visible buffer, aligned to the data type.
const u32 bytes_per_buffer_elem =
VertexManagerBase::GetTexelBufferElementSize(info->buffer_format);
// Allocate space in stream buffer, and copy texture + palette across.
u32 src_offset = 0, palette_offset = 0;
if (info->palette_size > 0)
{
if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset,
palette, info->palette_size,
TEXEL_BUFFER_FORMAT_R16_UINT, &palette_offset))
{
return false;
}
}
else
{
if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset))
return false;
}
// Set up uniforms.
struct Uniforms
{
u32 dst_width, dst_height;
u32 src_width, src_height;
u32 src_offset, src_row_stride;
u32 palette_offset, unused;
} uniforms = {width, height, aligned_width,
aligned_height, src_offset, row_stride / bytes_per_buffer_elem,
palette_offset};
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
g_renderer->SetComputeImageTexture(m_decoding_texture.get(), false, true);
auto dispatch_groups =
TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height);
g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1);
// Copy from decoding texture -> final texture
// This is because we don't want to have to create compute view for every layer
const auto copy_rect = entry->texture->GetConfig().GetMipRect(dst_level);
entry->texture->CopyRectangleFromTexture(m_decoding_texture.get(), copy_rect, 0, 0, copy_rect, 0,
dst_level);
entry->texture->FinishedRendering();
return true;
}
u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
{
const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt);
@ -2362,3 +2626,9 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const
return temp_hash;
}
}
TextureCacheBase::TexPoolEntry::TexPoolEntry(std::unique_ptr<AbstractTexture> tex,
std::unique_ptr<AbstractFramebuffer> fb)
: texture(std::move(tex)), framebuffer(std::move(fb))
{
}