Move most backend functionality to VideoCommon

This commit is contained in:
Stenzek
2019-02-15 11:59:50 +10:00
parent 933f3ba008
commit f039149198
182 changed files with 8334 additions and 15917 deletions

View File

@ -5,10 +5,13 @@
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractTexture.h"
AbstractFramebuffer::AbstractFramebuffer(AbstractTextureFormat color_format,
AbstractFramebuffer::AbstractFramebuffer(AbstractTexture* color_attachment,
AbstractTexture* depth_attachment,
AbstractTextureFormat color_format,
AbstractTextureFormat depth_format, u32 width, u32 height,
u32 layers, u32 samples)
: m_color_format(color_format), m_depth_format(depth_format), m_width(width), m_height(height),
: m_color_attachment(color_attachment), m_depth_attachment(depth_attachment),
m_color_format(color_format), m_depth_format(depth_format), m_width(width), m_height(height),
m_layers(layers), m_samples(samples)
{
}
@ -26,7 +29,7 @@ bool AbstractFramebuffer::ValidateConfig(const AbstractTexture* color_attachment
// MSAA textures are not supported with mip levels on most backends, and it simplifies our
// handling of framebuffers.
auto CheckAttachment = [](const AbstractTexture* tex) {
return tex->GetConfig().rendertarget && tex->GetConfig().levels == 1;
return tex->GetConfig().IsRenderTarget() && tex->GetConfig().levels == 1;
};
if ((color_attachment && !CheckAttachment(color_attachment)) ||
(depth_attachment && !CheckAttachment(depth_attachment)))

View File

@ -18,13 +18,16 @@ class AbstractTexture;
class AbstractFramebuffer
{
public:
AbstractFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format,
AbstractFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment,
AbstractTextureFormat color_format, AbstractTextureFormat depth_format,
u32 width, u32 height, u32 layers, u32 samples);
virtual ~AbstractFramebuffer();
static bool ValidateConfig(const AbstractTexture* color_attachment,
const AbstractTexture* depth_attachment);
AbstractTexture* GetColorAttachment() const { return m_color_attachment; }
AbstractTexture* GetDepthAttachment() const { return m_depth_attachment; }
AbstractTextureFormat GetColorFormat() const { return m_color_format; }
AbstractTextureFormat GetDepthFormat() const { return m_depth_format; }
bool HasColorBuffer() const { return m_color_format != AbstractTextureFormat::Undefined; }
@ -36,6 +39,8 @@ public:
MathUtil::Rectangle<int> GetRect() const;
protected:
AbstractTexture* m_color_attachment;
AbstractTexture* m_depth_attachment;
AbstractTextureFormat m_color_format;
AbstractTextureFormat m_depth_format;
u32 m_width;

View File

@ -45,24 +45,7 @@ struct AbstractPipelineConfig
RasterizationState rasterization_state;
DepthState depth_state;
BlendingState blending_state;
union FramebufferState
{
BitField<0, 8, AbstractTextureFormat> color_texture_format;
BitField<8, 8, AbstractTextureFormat> depth_texture_format;
BitField<16, 8, u32> samples;
BitField<24, 1, u32> per_sample_shading;
bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; }
bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; }
FramebufferState& operator=(const FramebufferState& rhs)
{
hex = rhs.hex;
return *this;
}
u32 hex;
} framebuffer_state;
FramebufferState framebuffer_state;
AbstractPipelineUsage usage;

View File

@ -20,8 +20,16 @@ public:
virtual ~AbstractStagingTexture();
const TextureConfig& GetConfig() const { return m_config; }
u32 GetWidth() const { return m_config.width; }
u32 GetHeight() const { return m_config.height; }
u32 GetLevels() const { return m_config.levels; }
u32 GetLayers() const { return m_config.layers; }
u32 GetSamples() const { return m_config.samples; }
AbstractTextureFormat GetFormat() const { return m_config.format; }
MathUtil::Rectangle<int> GetRect() const { return m_config.GetRect(); }
StagingTextureType GetType() const { return m_type; }
size_t GetTexelSize() const { return m_texel_size; }
bool IsMapped() const { return m_map_pointer != nullptr; }
char* GetMappedPointer() const { return m_map_pointer; }
size_t GetMappedStride() const { return m_map_stride; }

View File

@ -15,6 +15,10 @@ AbstractTexture::AbstractTexture(const TextureConfig& c) : m_config(c)
{
}
void AbstractTexture::FinishedRendering()
{
}
bool AbstractTexture::Save(const std::string& filename, unsigned int level)
{
// We can't dump compressed textures currently (it would mean drawing them to a RGBA8
@ -30,7 +34,7 @@ bool AbstractTexture::Save(const std::string& filename, unsigned int level)
// Use a temporary staging texture for the download. Certainly not optimal,
// but this is not a frequently-executed code path..
TextureConfig readback_texture_config(level_width, level_height, 1, 1, 1,
AbstractTextureFormat::RGBA8, false);
AbstractTextureFormat::RGBA8, 0);
auto readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, readback_texture_config);
if (!readback_texture)
@ -84,7 +88,24 @@ bool AbstractTexture::IsStencilFormat(AbstractTextureFormat format)
return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8;
}
size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length)
AbstractTextureFormat AbstractTexture::GetColorFormatForDepthFormat(AbstractTextureFormat format)
{
switch (format)
{
case AbstractTextureFormat::D16:
return AbstractTextureFormat::R16;
case AbstractTextureFormat::D24_S8: // TODO: Incorrect
case AbstractTextureFormat::D32F:
case AbstractTextureFormat::D32F_S8:
return AbstractTextureFormat::R32F;
default:
return format;
}
}
u32 AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length)
{
switch (format)
{
@ -111,7 +132,7 @@ size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u
}
}
size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format)
u32 AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format)
{
switch (format)
{
@ -138,6 +159,21 @@ size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format)
}
}
u32 AbstractTexture::GetBlockSizeForFormat(AbstractTextureFormat format)
{
switch (format)
{
case AbstractTextureFormat::DXT1:
case AbstractTextureFormat::DXT3:
case AbstractTextureFormat::DXT5:
case AbstractTextureFormat::BPTC:
return 4;
default:
return 1;
}
}
const TextureConfig& AbstractTexture::GetConfig() const
{
return m_config;

View File

@ -21,28 +21,33 @@ public:
const MathUtil::Rectangle<int>& src_rect, u32 src_layer,
u32 src_level, const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level) = 0;
virtual void ScaleRectangleFromTexture(const AbstractTexture* source,
const MathUtil::Rectangle<int>& srcrect,
const MathUtil::Rectangle<int>& dstrect) = 0;
virtual void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& rect,
u32 layer, u32 level) = 0;
virtual void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size) = 0;
// Hints to the backend that we have finished rendering to this texture, and it will be used
// as a shader resource and sampled. For Vulkan, this transitions the image layout.
virtual void FinishedRendering();
u32 GetWidth() const { return m_config.width; }
u32 GetHeight() const { return m_config.height; }
u32 GetLevels() const { return m_config.levels; }
u32 GetLayers() const { return m_config.layers; }
u32 GetSamples() const { return m_config.samples; }
AbstractTextureFormat GetFormat() const { return m_config.format; }
MathUtil::Rectangle<int> GetRect() const { return m_config.GetRect(); }
MathUtil::Rectangle<int> GetMipRect(u32 level) const { return m_config.GetMipRect(level); }
bool IsMultisampled() const { return m_config.IsMultisampled(); }
bool Save(const std::string& filename, unsigned int level);
static bool IsCompressedFormat(AbstractTextureFormat format);
static bool IsDepthFormat(AbstractTextureFormat format);
static bool IsStencilFormat(AbstractTextureFormat format);
static size_t CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length);
static size_t GetTexelSizeForFormat(AbstractTextureFormat format);
static AbstractTextureFormat GetColorFormatForDepthFormat(AbstractTextureFormat format);
static u32 CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length);
static u32 GetTexelSizeForFormat(AbstractTextureFormat format);
static u32 GetBlockSizeForFormat(AbstractTextureFormat format);
const TextureConfig& GetConfig() const;

View File

@ -5,8 +5,10 @@
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/BPFunctions.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/VertexManagerBase.h"
@ -51,8 +53,10 @@ void SetScissor()
bpmem.scissorBR.x - xoff + 1, bpmem.scissorBR.y - yoff + 1);
native_rc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT);
TargetRectangle target_rc = g_renderer->ConvertEFBRectangle(native_rc);
g_renderer->SetScissorRect(target_rc);
auto target_rc = g_renderer->ConvertEFBRectangle(native_rc);
auto converted_rc =
g_renderer->ConvertFramebufferRectangle(target_rc, g_renderer->GetCurrentFramebuffer());
g_renderer->SetScissorRect(converted_rc);
}
void SetViewport()
@ -122,6 +126,21 @@ void SetViewport()
far_depth = 1.0f - min_depth;
}
// Clamp to size if oversized not supported. Required for D3D.
if (!g_ActiveConfig.backend_info.bSupportsOversizedViewports)
{
const float max_width = static_cast<float>(g_renderer->GetCurrentFramebuffer()->GetWidth());
const float max_height = static_cast<float>(g_renderer->GetCurrentFramebuffer()->GetHeight());
x = MathUtil::Clamp(x, 0.0f, max_width - 1.0f);
y = MathUtil::Clamp(y, 0.0f, max_height - 1.0f);
width = MathUtil::Clamp(width, 1.0f, max_width - x);
height = MathUtil::Clamp(height, 1.0f, max_height - y);
}
// Lower-left flip.
if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin)
y = static_cast<float>(g_renderer->GetCurrentFramebuffer()->GetHeight()) - y - height;
g_renderer->SetViewport(x, y, width, height, near_depth, far_depth);
}
@ -188,8 +207,6 @@ void ClearScreen(const EFBRectangle& rc)
void OnPixelFormatChange()
{
int convtype = -1;
// TODO : Check for Z compression format change
// When using 16bit Z, the game may enable a special compression format which we need to handle
// If we don't, Z values will be completely screwed up, currently only Star Wars:RS2 uses that.
@ -205,58 +222,74 @@ void OnPixelFormatChange()
auto old_format = g_renderer->GetPrevPixelFormat();
auto new_format = bpmem.zcontrol.pixel_format;
g_renderer->StorePixelFormat(new_format);
DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast<int>(new_format),
static_cast<int>(bpmem.zcontrol.zformat));
// no need to reinterpret pixel data in these cases
if (new_format == old_format || old_format == PEControl::INVALID_FMT)
goto skip;
return;
// Check for pixel format changes
switch (old_format)
{
case PEControl::RGB8_Z24:
case PEControl::Z24:
{
// Z24 and RGB8_Z24 are treated equal, so just return in this case
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
goto skip;
return;
if (new_format == PEControl::RGBA6_Z24)
convtype = 0;
{
g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB8ToRGBA6);
return;
}
else if (new_format == PEControl::RGB565_Z16)
convtype = 1;
break;
{
g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB8ToRGB565);
return;
}
}
break;
case PEControl::RGBA6_Z24:
{
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
convtype = 2;
{
g_renderer->ReinterpretPixelData(EFBReinterpretType::RGBA6ToRGB8);
return;
}
else if (new_format == PEControl::RGB565_Z16)
convtype = 3;
break;
{
g_renderer->ReinterpretPixelData(EFBReinterpretType::RGBA6ToRGB565);
return;
}
}
break;
case PEControl::RGB565_Z16:
{
if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24)
convtype = 4;
{
g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB565ToRGB8);
return;
}
else if (new_format == PEControl::RGBA6_Z24)
convtype = 5;
break;
{
g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB565ToRGBA6);
return;
}
}
break;
default:
break;
}
if (convtype == -1)
{
ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d", static_cast<int>(old_format),
static_cast<int>(new_format));
goto skip;
}
g_renderer->ReinterpretPixelData(convtype);
skip:
DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast<int>(new_format),
static_cast<int>(bpmem.zcontrol.zformat));
g_renderer->StorePixelFormat(new_format);
ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d", static_cast<int>(old_format),
static_cast<int>(new_format));
}
void SetInterlacingMode(const BPCmd& bp)
@ -286,4 +319,4 @@ void SetInterlacingMode(const BPCmd& bp)
break;
}
}
};
}; // namespace BPFunctions

View File

@ -10,11 +10,11 @@ add_library(videocommon
BPStructs.cpp
CPMemory.cpp
CommandProcessor.cpp
Debugger.cpp
DriverDetails.cpp
Fifo.cpp
FPSCounter.cpp
FramebufferManagerBase.cpp
FramebufferManager.cpp
FramebufferShaderGen.cpp
GeometryShaderGen.cpp
GeometryShaderManager.cpp
HiresTextures.cpp

View File

@ -1,163 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <string>
#include "Common/FileUtil.h"
#include "Common/StringUtil.h"
#include "Common/Thread.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/VideoConfig.h"
GFXDebuggerBase* g_pdebugger = nullptr;
volatile bool GFXDebuggerPauseFlag =
false; // if true, the GFX thread will be spin locked until it's false again
volatile PauseEvent GFXDebuggerToPauseAtNext =
NOT_PAUSE; // Event which will trigger spin locking the GFX thread
volatile int GFXDebuggerEventToPauseCount =
0; // Number of events to wait for until GFX thread will be paused
void GFXDebuggerUpdateScreen()
{
// TODO: Implement this in a backend-independent way
/* // update screen
if (D3D::bFrameInProgress)
{
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
D3D::dev->SetDepthStencilSurface(nullptr);
D3D::dev->StretchRect(FramebufferManager::GetEFBColorRTSurface(), nullptr,
D3D::GetBackBufferSurface(), nullptr,
D3DTEXF_LINEAR);
D3D::dev->EndScene();
D3D::dev->Present(nullptr, nullptr, nullptr, nullptr);
D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface());
D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface());
D3D::dev->BeginScene();
}
else
{
D3D::dev->EndScene();
D3D::dev->Present(nullptr, nullptr, nullptr, nullptr);
D3D::dev->BeginScene();
}*/
}
// GFX thread
void GFXDebuggerCheckAndPause(bool update)
{
if (GFXDebuggerPauseFlag)
{
g_pdebugger->OnPause();
while (GFXDebuggerPauseFlag)
{
if (update)
GFXDebuggerUpdateScreen();
Common::SleepCurrentThread(5);
}
g_pdebugger->OnContinue();
}
}
// GFX thread
void GFXDebuggerToPause(bool update)
{
GFXDebuggerToPauseAtNext = NOT_PAUSE;
GFXDebuggerPauseFlag = true;
GFXDebuggerCheckAndPause(update);
}
void ContinueGFXDebugger()
{
GFXDebuggerPauseFlag = false;
}
void GFXDebuggerBase::DumpPixelShader(const std::string& path)
{
const std::string filename = StringFromFormat("%sdump_ps.txt", path.c_str());
std::string output;
bool useDstAlpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
if (!useDstAlpha)
{
output = "Destination alpha disabled:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType,
/// g_nativeVertexFmt->m_components);
}
else
{
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
output = "Using dual source blending for destination alpha:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND,
/// g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
output = "Using two passes for emulating destination alpha:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType,
/// g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n";
/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS,
/// g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
}
File::CreateEmptyFile(filename);
File::WriteStringToFile(output, filename);
}
void GFXDebuggerBase::DumpVertexShader(const std::string& path)
{
const std::string filename = StringFromFormat("%sdump_vs.txt", path.c_str());
File::CreateEmptyFile(filename);
/// File::WriteStringToFile(GenerateVertexShaderCode(g_nativeVertexFmt->m_components,
/// g_ActiveConfig.backend_info.APIType), filename);
}
void GFXDebuggerBase::DumpPixelShaderConstants(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpVertexShaderConstants(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpTextures(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpFrameBuffer(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpGeometry(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpVertexDecl(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpMatrices(const std::string& path)
{
// TODO
}
void GFXDebuggerBase::DumpStats(const std::string& path)
{
// TODO
}

View File

@ -1,83 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <string>
class GFXDebuggerBase
{
public:
virtual ~GFXDebuggerBase() {}
// if paused, debugging functions can be enabled
virtual void OnPause() {}
virtual void OnContinue() {}
void DumpPixelShader(const std::string& path);
void DumpVertexShader(const std::string& path);
void DumpPixelShaderConstants(const std::string& path);
void DumpVertexShaderConstants(const std::string& path);
void DumpTextures(const std::string& path);
void DumpFrameBuffer(const std::string& path);
void DumpGeometry(const std::string& path);
void DumpVertexDecl(const std::string& path);
void DumpMatrices(const std::string& path);
void DumpStats(const std::string& path);
};
enum PauseEvent
{
NOT_PAUSE = 0,
NEXT_FRAME = 1 << 0,
NEXT_FLUSH = 1 << 1,
NEXT_PIXEL_SHADER_CHANGE = 1 << 2,
NEXT_VERTEX_SHADER_CHANGE = 1 << 3,
NEXT_TEXTURE_CHANGE = 1 << 4,
NEXT_NEW_TEXTURE = 1 << 5,
NEXT_XFB_CMD = 1 << 6, // TODO
NEXT_EFB_CMD = 1 << 7, // TODO
NEXT_MATRIX_CMD = 1 << 8, // TODO
NEXT_VERTEX_CMD = 1 << 9, // TODO
NEXT_TEXTURE_CMD = 1 << 10, // TODO
NEXT_LIGHT_CMD = 1 << 11, // TODO
NEXT_FOG_CMD = 1 << 12, // TODO
NEXT_SET_TLUT = 1 << 13, // TODO
NEXT_ERROR = 1 << 14, // TODO
};
extern GFXDebuggerBase* g_pdebugger;
extern volatile bool GFXDebuggerPauseFlag;
extern volatile PauseEvent GFXDebuggerToPauseAtNext;
extern volatile int GFXDebuggerEventToPauseCount;
void ContinueGFXDebugger();
void GFXDebuggerCheckAndPause(bool update);
void GFXDebuggerToPause(bool update);
void GFXDebuggerUpdateScreen();
#define GFX_DEBUGGER_PAUSE_AT(event, update) \
{ \
if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \
GFXDebuggerPauseFlag) \
GFXDebuggerToPause(update); \
}
#define GFX_DEBUGGER_PAUSE_LOG_AT(event, update, dumpfunc) \
{ \
if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \
GFXDebuggerPauseFlag) \
{ \
{dumpfunc}; \
GFXDebuggerToPause(update); \
} \
}
#define GFX_DEBUGGER_LOG_AT(event, dumpfunc) \
{ \
if ((GFXDebuggerToPauseAtNext & event)) \
{ \
{dumpfunc}; \
} \
}

View File

@ -0,0 +1,764 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/FramebufferManager.h"
#include <memory>
#include "VideoCommon/FramebufferShaderGen.h"
#include "VideoCommon/VertexManagerBase.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/VideoConfig.h"
// Maximum number of pixels poked in one batch * 6
constexpr size_t MAX_POKE_VERTICES = 32768;
std::unique_ptr<FramebufferManager> g_framebuffer_manager;
FramebufferManager::FramebufferManager() = default;
FramebufferManager::~FramebufferManager()
{
DestroyClearPipelines();
DestroyPokePipelines();
DestroyConversionPipelines();
DestroyReadbackPipelines();
DestroyReadbackFramebuffer();
DestroyEFBFramebuffer();
}
bool FramebufferManager::Initialize()
{
if (!CreateEFBFramebuffer())
{
PanicAlert("Failed to create EFB framebuffer");
return false;
}
if (!CreateReadbackFramebuffer())
{
PanicAlert("Failed to create EFB readback framebuffer");
return false;
}
if (!CompileReadbackPipelines())
{
PanicAlert("Failed to compile EFB readback pipelines");
return false;
}
if (!CompileConversionPipelines())
{
PanicAlert("Failed to compile EFB conversion pipelines");
return false;
}
if (!CompileClearPipelines())
{
PanicAlert("Failed to compile EFB clear pipelines");
return false;
}
if (!CompilePokePipelines())
{
PanicAlert("Failed to compile EFB poke pipelines");
return false;
}
return true;
}
void FramebufferManager::RecreateEFBFramebuffer()
{
FlushEFBPokes();
InvalidatePeekCache();
DestroyReadbackFramebuffer();
DestroyEFBFramebuffer();
if (!CreateEFBFramebuffer() || !CreateReadbackFramebuffer())
PanicAlert("Failed to recreate EFB framebuffer");
}
void FramebufferManager::RecompileShaders()
{
DestroyPokePipelines();
DestroyClearPipelines();
DestroyConversionPipelines();
DestroyReadbackPipelines();
if (!CompileReadbackPipelines() || !CompileConversionPipelines() || !CompileClearPipelines() ||
!CompilePokePipelines())
{
PanicAlert("Failed to recompile EFB pipelines");
}
}
AbstractTextureFormat FramebufferManager::GetEFBColorFormat()
{
// The EFB can be set to different pixel formats by the game through the
// BPMEM_ZCOMPARE register (which should probably have a different name).
// They are:
// - 24-bit RGB (8-bit components) with 24-bit Z
// - 24-bit RGBA (6-bit components) with 24-bit Z
// - Multisampled 16-bit RGB (5-6-5 format) with 16-bit Z
// We only use one EFB format here: 32-bit ARGB with 32-bit Z.
// Multisampling depends on user settings.
// The distinction becomes important for certain operations, i.e. the
// alpha channel should be ignored if the EFB does not have one.
return AbstractTextureFormat::RGBA8;
}
AbstractTextureFormat FramebufferManager::GetEFBDepthFormat()
{
// 32-bit depth clears are broken in the Adreno Vulkan driver, and have no effect.
// To work around this, we use a D24_S8 buffer instead, which results in a loss of accuracy.
// We still resolve this to a R32F texture, as there is no 24-bit format.
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR))
return AbstractTextureFormat::D24_S8;
else
return AbstractTextureFormat::D32F;
}
static u32 CalculateEFBLayers()
{
return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1;
}
TextureConfig FramebufferManager::GetEFBColorTextureConfig()
{
return TextureConfig(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), 1,
CalculateEFBLayers(), g_ActiveConfig.iMultisamples, GetEFBColorFormat(),
AbstractTextureFlag_RenderTarget);
}
TextureConfig FramebufferManager::GetEFBDepthTextureConfig()
{
return TextureConfig(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), 1,
CalculateEFBLayers(), g_ActiveConfig.iMultisamples, GetEFBDepthFormat(),
AbstractTextureFlag_RenderTarget);
}
FramebufferState FramebufferManager::GetEFBFramebufferState() const
{
FramebufferState ret = {};
ret.color_texture_format = m_efb_color_texture->GetFormat();
ret.depth_texture_format = m_efb_depth_texture->GetFormat();
ret.per_sample_shading = IsEFBMultisampled() && g_ActiveConfig.bSSAA;
ret.samples = m_efb_color_texture->GetSamples();
return ret;
}
bool FramebufferManager::CreateEFBFramebuffer()
{
const TextureConfig efb_color_texture_config = GetEFBColorTextureConfig();
const TextureConfig efb_depth_texture_config = GetEFBDepthTextureConfig();
// We need a second texture to swap with for changing pixel formats
m_efb_color_texture = g_renderer->CreateTexture(efb_color_texture_config);
m_efb_depth_texture = g_renderer->CreateTexture(efb_depth_texture_config);
m_efb_convert_color_texture = g_renderer->CreateTexture(efb_color_texture_config);
if (!m_efb_color_texture || !m_efb_depth_texture || !m_efb_convert_color_texture)
return false;
m_efb_framebuffer =
g_renderer->CreateFramebuffer(m_efb_color_texture.get(), m_efb_depth_texture.get());
m_efb_convert_framebuffer =
g_renderer->CreateFramebuffer(m_efb_convert_color_texture.get(), m_efb_depth_texture.get());
if (!m_efb_framebuffer || !m_efb_convert_framebuffer)
return false;
// Create resolved textures if MSAA is on
if (g_ActiveConfig.MultisamplingEnabled())
{
m_efb_resolve_color_texture = g_renderer->CreateTexture(
TextureConfig(efb_color_texture_config.width, efb_color_texture_config.height, 1,
efb_color_texture_config.layers, 1, efb_color_texture_config.format, 0));
m_efb_depth_resolve_texture = g_renderer->CreateTexture(TextureConfig(
efb_depth_texture_config.width, efb_depth_texture_config.height, 1,
efb_depth_texture_config.layers, 1,
AbstractTexture::GetColorFormatForDepthFormat(efb_depth_texture_config.format),
AbstractTextureFlag_RenderTarget));
if (!m_efb_resolve_color_texture || !m_efb_depth_resolve_texture)
return false;
m_efb_depth_resolve_framebuffer =
g_renderer->CreateFramebuffer(m_efb_depth_resolve_texture.get(), nullptr);
if (!m_efb_depth_resolve_framebuffer)
return false;
}
// Clear the renderable textures out.
g_renderer->SetAndClearFramebuffer(
m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}},
g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : 0.0f);
return true;
}
void FramebufferManager::DestroyEFBFramebuffer()
{
m_efb_framebuffer.reset();
m_efb_convert_framebuffer.reset();
m_efb_color_texture.reset();
m_efb_convert_color_texture.reset();
m_efb_depth_texture.reset();
m_efb_resolve_color_texture.reset();
m_efb_depth_resolve_framebuffer.reset();
m_efb_depth_resolve_texture.reset();
}
void FramebufferManager::BindEFBFramebuffer()
{
g_renderer->SetFramebuffer(m_efb_framebuffer.get());
}
AbstractTexture* FramebufferManager::ResolveEFBColorTexture(const MathUtil::Rectangle<int>& region)
{
// Return the normal EFB texture if multisampling is off.
if (!IsEFBMultisampled())
{
m_efb_color_texture->FinishedRendering();
return m_efb_color_texture.get();
}
// It's not valid to resolve an out-of-range rectangle.
MathUtil::Rectangle<int> clamped_region = region;
clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight());
clamped_region = g_renderer->ConvertFramebufferRectangle(clamped_region, m_efb_framebuffer.get());
// Resolve to our already-created texture.
for (u32 layer = 0; layer < GetEFBLayers(); layer++)
{
m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region,
layer, 0);
}
m_efb_resolve_color_texture->FinishedRendering();
return m_efb_resolve_color_texture.get();
}
AbstractTexture* FramebufferManager::ResolveEFBDepthTexture(const MathUtil::Rectangle<int>& region)
{
if (!IsEFBMultisampled())
{
m_efb_depth_texture->FinishedRendering();
return m_efb_depth_texture.get();
}
// It's not valid to resolve an out-of-range rectangle.
MathUtil::Rectangle<int> clamped_region = region;
clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight());
clamped_region = g_renderer->ConvertFramebufferRectangle(clamped_region, m_efb_framebuffer.get());
m_efb_depth_texture->FinishedRendering();
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_efb_depth_resolve_framebuffer.get());
g_renderer->SetPipeline(m_efb_depth_resolve_pipeline.get());
g_renderer->SetTexture(0, m_efb_depth_texture.get());
g_renderer->SetSamplerState(0, RenderState::GetPointSamplerState());
g_renderer->SetViewportAndScissor(clamped_region);
g_renderer->Draw(0, 3);
m_efb_depth_resolve_texture->FinishedRendering();
g_renderer->EndUtilityDrawing();
return m_efb_depth_resolve_texture.get();
}
bool FramebufferManager::ReinterpretPixelData(EFBReinterpretType convtype)
{
if (!m_format_conversion_pipelines[static_cast<u32>(convtype)])
return false;
// Draw to the secondary framebuffer.
m_efb_color_texture->FinishedRendering();
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_efb_convert_framebuffer.get());
g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect());
g_renderer->SetPipeline(m_format_conversion_pipelines[static_cast<u32>(convtype)].get());
g_renderer->SetTexture(0, m_efb_color_texture.get());
g_renderer->Draw(0, 3);
// And swap the framebuffers around, so we do new drawing to the converted framebuffer.
std::swap(m_efb_color_texture, m_efb_convert_color_texture);
std::swap(m_efb_framebuffer, m_efb_convert_framebuffer);
g_renderer->EndUtilityDrawing();
return true;
}
bool FramebufferManager::CompileConversionPipelines()
{
for (u32 i = 0; i < NUM_EFB_REINTERPRET_TYPES; i++)
{
std::unique_ptr<AbstractShader> pixel_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, FramebufferShaderGen::GenerateFormatConversionShader(
static_cast<EFBReinterpretType>(i), GetEFBSamples()));
if (!pixel_shader)
return false;
AbstractPipelineConfig config = {};
config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader();
config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr;
config.pixel_shader = pixel_shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = GetEFBFramebufferState();
config.usage = AbstractPipelineUsage::Utility;
m_format_conversion_pipelines[i] = g_renderer->CreatePipeline(config);
if (!m_format_conversion_pipelines[i])
return false;
}
return true;
}
void FramebufferManager::DestroyConversionPipelines()
{
for (auto& pipeline : m_format_conversion_pipelines)
pipeline.reset();
}
bool FramebufferManager::PopulateColorReadbackTexture()
{
g_vertex_manager->OnCPUEFBAccess();
// Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on.
AbstractTexture* src_texture =
ResolveEFBColorTexture(MathUtil::Rectangle<int>(0, 0, GetEFBWidth(), GetEFBHeight()));
if (g_renderer->GetEFBScale() != 1)
{
// Downsample from internal resolution to 1x.
// TODO: This won't produce correct results at IRs above 2x.
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_color_copy_framebuffer.get());
g_renderer->SetViewportAndScissor(m_color_copy_framebuffer->GetRect());
g_renderer->SetPipeline(m_color_copy_pipeline.get());
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState());
g_renderer->Draw(0, 3);
// Copy from EFB or copy texture to staging texture.
m_color_readback_texture->CopyFromTexture(m_color_copy_texture.get(),
m_color_readback_texture->GetRect(), 0, 0,
m_color_readback_texture->GetRect());
g_renderer->EndUtilityDrawing();
}
else
{
m_color_readback_texture->CopyFromTexture(src_texture, m_color_readback_texture->GetRect(), 0,
0, m_color_readback_texture->GetRect());
}
// Wait until the copy is complete.
m_color_readback_texture->Flush();
m_color_readback_texture_valid = true;
return true;
}
bool FramebufferManager::PopulateDepthReadbackTexture()
{
g_vertex_manager->OnCPUEFBAccess();
// Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on.
AbstractTexture* src_texture =
ResolveEFBDepthTexture(MathUtil::Rectangle<int>(0, 0, GetEFBWidth(), GetEFBHeight()));
if (g_renderer->GetEFBScale() != 1)
{
// Downsample from internal resolution to 1x.
// TODO: This won't produce correct results at IRs above 2x.
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_depth_copy_framebuffer.get());
g_renderer->SetViewportAndScissor(m_depth_copy_framebuffer->GetRect());
g_renderer->SetPipeline(m_depth_copy_pipeline.get());
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState());
g_renderer->Draw(0, 3);
// No need to call FinishedRendering() here because CopyFromTexture() transitions.
m_depth_readback_texture->CopyFromTexture(m_depth_copy_texture.get(),
m_depth_readback_texture->GetRect(), 0, 0,
m_depth_readback_texture->GetRect());
g_renderer->EndUtilityDrawing();
}
else
{
m_depth_readback_texture->CopyFromTexture(src_texture, m_depth_readback_texture->GetRect(), 0,
0, m_depth_readback_texture->GetRect());
}
// Wait until the copy is complete.
m_depth_readback_texture->Flush();
m_depth_readback_texture_valid = true;
return true;
}
void FramebufferManager::InvalidatePeekCache()
{
m_color_readback_texture_valid = false;
m_depth_readback_texture_valid = false;
}
bool FramebufferManager::CompileReadbackPipelines()
{
AbstractPipelineConfig config = {};
config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader();
config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr;
config.pixel_shader = g_shader_cache->GetTextureCopyPixelShader();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetColorFramebufferState(GetEFBColorFormat());
config.usage = AbstractPipelineUsage::Utility;
m_color_copy_pipeline = g_renderer->CreatePipeline(config);
if (!m_color_copy_pipeline)
return false;
// same for depth, except different format
config.framebuffer_state.color_texture_format =
AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat());
m_depth_copy_pipeline = g_renderer->CreatePipeline(config);
if (!m_depth_copy_pipeline)
return false;
if (IsEFBMultisampled())
{
auto depth_resolve_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, FramebufferShaderGen::GenerateResolveDepthPixelShader(GetEFBSamples()));
if (!depth_resolve_shader)
return false;
config.pixel_shader = depth_resolve_shader.get();
m_efb_depth_resolve_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_depth_resolve_pipeline)
return false;
}
return true;
}
void FramebufferManager::DestroyReadbackPipelines()
{
m_efb_depth_resolve_pipeline.reset();
m_depth_copy_pipeline.reset();
m_color_copy_pipeline.reset();
}
bool FramebufferManager::CreateReadbackFramebuffer()
{
const TextureConfig color_config(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, GetEFBColorFormat(),
AbstractTextureFlag_RenderTarget);
const TextureConfig depth_config(
EFB_WIDTH, EFB_HEIGHT, 1, 1, 1,
AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()),
AbstractTextureFlag_RenderTarget);
if (g_renderer->GetEFBScale() != 1)
{
m_color_copy_texture = g_renderer->CreateTexture(color_config);
m_depth_copy_texture = g_renderer->CreateTexture(depth_config);
if (!m_color_copy_texture || !m_depth_copy_texture)
return false;
m_color_copy_framebuffer = g_renderer->CreateFramebuffer(m_color_copy_texture.get(), nullptr);
m_depth_copy_framebuffer = g_renderer->CreateFramebuffer(m_depth_copy_texture.get(), nullptr);
if (!m_color_copy_framebuffer || !m_depth_copy_framebuffer)
return false;
}
m_color_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Mutable, color_config);
m_depth_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Mutable, depth_config);
if (!m_color_readback_texture || !m_depth_readback_texture)
return false;
return true;
}
void FramebufferManager::DestroyReadbackFramebuffer()
{
m_depth_copy_framebuffer.reset();
m_depth_copy_texture.reset();
m_depth_readback_texture_valid = false;
m_color_copy_framebuffer.reset();
m_color_copy_texture.reset();
m_color_readback_texture_valid = false;
}
void FramebufferManager::ClearEFB(const MathUtil::Rectangle<int>& rc, bool clear_color,
bool clear_alpha, bool clear_z, u32 color, u32 z)
{
FlushEFBPokes();
InvalidatePeekCache();
g_renderer->BeginUtilityDrawing();
// Set up uniforms.
struct Uniforms
{
float clear_color[4];
float clear_depth;
float padding1, padding2, padding3;
};
static_assert(std::is_standard_layout<Uniforms>::value);
Uniforms uniforms = {{static_cast<float>((color >> 16) & 0xFF) / 255.0f,
static_cast<float>((color >> 8) & 0xFF) / 255.0f,
static_cast<float>((color >> 0) & 0xFF) / 255.0f,
static_cast<float>((color >> 24) & 0xFF) / 255.0f},
static_cast<float>(z & 0xFFFFFF) / 16777216.0f};
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
uniforms.clear_depth = 1.0f - uniforms.clear_depth;
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
const auto target_rc = g_renderer->ConvertFramebufferRectangle(
g_renderer->ConvertEFBRectangle(rc), m_efb_framebuffer.get());
g_renderer->SetPipeline(m_efb_clear_pipelines[clear_color][clear_alpha][clear_z].get());
g_renderer->SetViewportAndScissor(target_rc);
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
}
bool FramebufferManager::CompileClearPipelines()
{
auto vertex_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Vertex, FramebufferShaderGen::GenerateClearVertexShader());
if (!vertex_shader)
return false;
AbstractPipelineConfig config;
config.vertex_format = nullptr;
config.vertex_shader = vertex_shader.get();
config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetColorGeometryShader() : nullptr;
config.pixel_shader = g_shader_cache->GetColorPixelShader();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetAlwaysWriteDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = GetEFBFramebufferState();
config.usage = AbstractPipelineUsage::Utility;
for (u32 color_enable = 0; color_enable < 2; color_enable++)
{
config.blending_state.colorupdate = color_enable != 0;
for (u32 alpha_enable = 0; alpha_enable < 2; alpha_enable++)
{
config.blending_state.alphaupdate = alpha_enable != 0;
for (u32 depth_enable = 0; depth_enable < 2; depth_enable++)
{
config.depth_state.testenable = depth_enable != 0;
config.depth_state.updateenable = depth_enable != 0;
m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable] =
g_renderer->CreatePipeline(config);
if (!m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable])
return false;
}
}
}
return true;
}
void FramebufferManager::DestroyClearPipelines()
{
for (u32 color_enable = 0; color_enable < 2; color_enable++)
{
for (u32 alpha_enable = 0; alpha_enable < 2; alpha_enable++)
{
for (u32 depth_enable = 0; depth_enable < 2; depth_enable++)
{
m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable].reset();
}
}
}
}
u32 FramebufferManager::PeekEFBColor(u32 x, u32 y)
{
if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture())
return 0;
// The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL.
if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin)
y = EFB_HEIGHT - 1 - y;
u32 value;
m_color_readback_texture->ReadTexel(x, y, &value);
return value;
}
float FramebufferManager::PeekEFBDepth(u32 x, u32 y)
{
if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture())
return 0.0f;
// The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL.
if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin)
y = EFB_HEIGHT - 1 - y;
float value;
m_depth_readback_texture->ReadTexel(x, y, &value);
return value;
}
void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color)
{
// Flush if we exceeded the number of vertices per batch.
if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES)
FlushEFBPokes();
CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color);
// Update the peek cache if it's valid, since we know the color of the pixel now.
if (m_color_readback_texture_valid)
{
// See comment above for reasoning for lower-left coordinates.
if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin)
y = EFB_HEIGHT - 1 - y;
m_color_readback_texture->WriteTexel(x, y, &color);
}
}
void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth)
{
// Flush if we exceeded the number of vertices per batch.
if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES)
FlushEFBPokes();
CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0);
// Update the peek cache if it's valid, since we know the color of the pixel now.
if (m_depth_readback_texture_valid)
{
// See comment above for reasoning for lower-left coordinates.
if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin)
y = EFB_HEIGHT - 1 - y;
m_depth_readback_texture->WriteTexel(x, y, &depth);
}
}
void FramebufferManager::CreatePokeVertices(std::vector<EFBPokeVertex>* destination_list, u32 x,
u32 y, float z, u32 color)
{
const float cs_pixel_width = 1.0f / EFB_WIDTH * 2.0f;
const float cs_pixel_height = 1.0f / EFB_HEIGHT * 2.0f;
if (g_ActiveConfig.backend_info.bSupportsLargePoints)
{
// GPU will expand the point to a quad.
const float cs_x = (static_cast<float>(x) + 0.5f) * cs_pixel_width - 1.0f;
const float cs_y = 1.0f - (static_cast<float>(y) + 0.5f) * cs_pixel_height;
const float point_size = static_cast<float>(g_renderer->GetEFBScale());
destination_list->push_back({{cs_x, cs_y, z, point_size}, color});
return;
}
// Generate quad from the single point (clip-space coordinates).
const float x1 = static_cast<float>(x) * cs_pixel_width - 1.0f;
const float y1 = 1.0f - static_cast<float>(y) * cs_pixel_height;
const float x2 = x1 + cs_pixel_width;
const float y2 = y1 + cs_pixel_height;
destination_list->push_back({{x1, y1, z, 1.0f}, color});
destination_list->push_back({{x2, y1, z, 1.0f}, color});
destination_list->push_back({{x1, y2, z, 1.0f}, color});
destination_list->push_back({{x1, y2, z, 1.0f}, color});
destination_list->push_back({{x2, y1, z, 1.0f}, color});
destination_list->push_back({{x2, y2, z, 1.0f}, color});
}
void FramebufferManager::FlushEFBPokes()
{
if (!m_color_poke_vertices.empty())
{
DrawPokeVertices(m_color_poke_vertices.data(), static_cast<u32>(m_color_poke_vertices.size()),
m_color_poke_pipeline.get());
m_color_poke_vertices.clear();
}
if (!m_depth_poke_vertices.empty())
{
DrawPokeVertices(m_depth_poke_vertices.data(), static_cast<u32>(m_depth_poke_vertices.size()),
m_depth_poke_pipeline.get());
m_depth_poke_vertices.clear();
}
}
void FramebufferManager::DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count,
const AbstractPipeline* pipeline)
{
// Copy to vertex buffer.
g_renderer->BeginUtilityDrawing();
u32 base_vertex, base_index;
g_vertex_manager->UploadUtilityVertices(vertices, sizeof(EFBPokeVertex),
static_cast<u32>(vertex_count), nullptr, 0, &base_vertex,
&base_index);
// Now we can draw.
g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect());
g_renderer->SetPipeline(pipeline);
g_renderer->Draw(base_vertex, vertex_count);
g_renderer->EndUtilityDrawing();
}
bool FramebufferManager::CompilePokePipelines()
{
PortableVertexDeclaration vtx_decl = {};
vtx_decl.position.enable = true;
vtx_decl.position.type = VAR_FLOAT;
vtx_decl.position.components = 4;
vtx_decl.position.integer = false;
vtx_decl.position.offset = offsetof(EFBPokeVertex, position);
vtx_decl.colors[0].enable = true;
vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE;
vtx_decl.colors[0].components = 4;
vtx_decl.colors[0].integer = false;
vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color);
vtx_decl.stride = sizeof(EFBPokeVertex);
m_poke_vertex_format = g_renderer->CreateNativeVertexFormat(vtx_decl);
if (!m_poke_vertex_format)
return false;
auto poke_vertex_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Vertex, FramebufferShaderGen::GenerateEFBPokeVertexShader());
if (!poke_vertex_shader)
return false;
AbstractPipelineConfig config = {};
config.vertex_format = m_poke_vertex_format.get();
config.vertex_shader = poke_vertex_shader.get();
config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetColorGeometryShader() : nullptr;
config.pixel_shader = g_shader_cache->GetColorPixelShader();
config.rasterization_state = RenderState::GetNoCullRasterizationState(
g_ActiveConfig.backend_info.bSupportsLargePoints ? PrimitiveType::Points :
PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = GetEFBFramebufferState();
config.usage = AbstractPipelineUsage::Utility;
m_color_poke_pipeline = g_renderer->CreatePipeline(config);
if (!m_color_poke_pipeline)
return false;
// Turn off color writes, depth writes on for depth pokes.
config.depth_state = RenderState::GetAlwaysWriteDepthState();
config.blending_state = RenderState::GetNoColorWriteBlendState();
m_depth_poke_pipeline = g_renderer->CreatePipeline(config);
if (!m_depth_poke_pipeline)
return false;
return true;
}
void FramebufferManager::DestroyPokePipelines()
{
m_depth_poke_pipeline.reset();
m_color_poke_pipeline.reset();
m_poke_vertex_format.reset();
}

View File

@ -0,0 +1,171 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include "Common/CommonTypes.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/TextureConfig.h"
class AbstractFramebuffer;
class AbstractPipeline;
class AbstractStagingTexture;
class NativeVertexFormat;
enum class EFBReinterpretType
{
RGB8ToRGB565 = 0,
RGB8ToRGBA6 = 1,
RGBA6ToRGB8 = 2,
RGBA6ToRGB565 = 3,
RGB565ToRGB8 = 4,
RGB565ToRGBA6 = 5
};
constexpr u32 NUM_EFB_REINTERPRET_TYPES = 6;
inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
{
return !((aLower >= bUpper) || (bLower >= aUpper));
}
class FramebufferManager final
{
public:
FramebufferManager();
virtual ~FramebufferManager();
// Does not require the framebuffer to be created. Slower than direct queries.
static AbstractTextureFormat GetEFBColorFormat();
static AbstractTextureFormat GetEFBDepthFormat();
static TextureConfig GetEFBColorTextureConfig();
static TextureConfig GetEFBDepthTextureConfig();
// Accessors.
AbstractTexture* GetEFBColorTexture() const { return m_efb_color_texture.get(); }
AbstractTexture* GetEFBDepthTexture() const { return m_efb_depth_texture.get(); }
AbstractFramebuffer* GetEFBFramebuffer() const { return m_efb_framebuffer.get(); }
u32 GetEFBWidth() const { return m_efb_color_texture->GetWidth(); }
u32 GetEFBHeight() const { return m_efb_color_texture->GetHeight(); }
u32 GetEFBLayers() const { return m_efb_color_texture->GetLayers(); }
u32 GetEFBSamples() const { return m_efb_color_texture->GetSamples(); }
bool IsEFBMultisampled() const { return m_efb_color_texture->IsMultisampled(); }
bool IsEFBStereo() const { return m_efb_color_texture->GetLayers() > 1; }
FramebufferState GetEFBFramebufferState() const;
// First-time setup.
bool Initialize();
// Recreate EFB framebuffers, call when the EFB size (IR) changes.
void RecreateEFBFramebuffer();
// Recompile shaders, use when MSAA mode changes.
void RecompileShaders();
// This is virtual, because D3D has both normalized and integer framebuffers.
void BindEFBFramebuffer();
// Resolve color/depth textures to a non-msaa texture, and return it.
AbstractTexture* ResolveEFBColorTexture(const MathUtil::Rectangle<int>& region);
AbstractTexture* ResolveEFBDepthTexture(const MathUtil::Rectangle<int>& region);
// Reinterpret pixel format of EFB color texture.
// Assumes no render pass is currently in progress.
// Swaps EFB framebuffers, so re-bind afterwards.
bool ReinterpretPixelData(EFBReinterpretType convtype);
// Clears the EFB using shaders.
void ClearEFB(const MathUtil::Rectangle<int>& rc, bool clear_color, bool clear_alpha,
bool clear_z, u32 color, u32 z);
// Reads a framebuffer value back from the GPU. This may block if the cache is not current.
u32 PeekEFBColor(u32 x, u32 y);
float PeekEFBDepth(u32 x, u32 y);
void InvalidatePeekCache();
// Writes a value to the framebuffer. This will never block, and writes will be batched.
void PokeEFBColor(u32 x, u32 y, u32 color);
void PokeEFBDepth(u32 x, u32 y, float depth);
void FlushEFBPokes();
protected:
struct EFBPokeVertex
{
float position[4];
u32 color;
};
static_assert(std::is_standard_layout<EFBPokeVertex>::value, "EFBPokeVertex is standard-layout");
bool CreateEFBFramebuffer();
void DestroyEFBFramebuffer();
bool CompileConversionPipelines();
void DestroyConversionPipelines();
bool CompileReadbackPipelines();
void DestroyReadbackPipelines();
bool CreateReadbackFramebuffer();
void DestroyReadbackFramebuffer();
bool CompileClearPipelines();
void DestroyClearPipelines();
bool CompilePokePipelines();
void DestroyPokePipelines();
bool PopulateColorReadbackTexture();
bool PopulateDepthReadbackTexture();
void CreatePokeVertices(std::vector<EFBPokeVertex>* destination_list, u32 x, u32 y, float z,
u32 color);
void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count,
const AbstractPipeline* pipeline);
std::unique_ptr<AbstractTexture> m_efb_color_texture;
std::unique_ptr<AbstractTexture> m_efb_convert_color_texture;
std::unique_ptr<AbstractTexture> m_efb_depth_texture;
std::unique_ptr<AbstractTexture> m_efb_resolve_color_texture;
std::unique_ptr<AbstractTexture> m_efb_depth_resolve_texture;
std::unique_ptr<AbstractFramebuffer> m_efb_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_convert_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_depth_resolve_framebuffer;
std::unique_ptr<AbstractPipeline> m_efb_depth_resolve_pipeline;
// Format conversion shaders
std::array<std::unique_ptr<AbstractPipeline>, 6> m_format_conversion_pipelines;
// EFB readback texture
std::unique_ptr<AbstractTexture> m_color_copy_texture;
std::unique_ptr<AbstractTexture> m_depth_copy_texture;
std::unique_ptr<AbstractFramebuffer> m_color_copy_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_depth_copy_framebuffer;
std::unique_ptr<AbstractPipeline> m_color_copy_pipeline;
std::unique_ptr<AbstractPipeline> m_depth_copy_pipeline;
// CPU-side EFB readback texture
std::unique_ptr<AbstractStagingTexture> m_color_readback_texture;
std::unique_ptr<AbstractStagingTexture> m_depth_readback_texture;
bool m_color_readback_texture_valid = false;
bool m_depth_readback_texture_valid = false;
// EFB clear pipelines
// Indexed by [color_write_enabled][alpha_write_enabled][depth_write_enabled]
std::array<std::array<std::array<std::unique_ptr<AbstractPipeline>, 2>, 2>, 2>
m_efb_clear_pipelines;
// EFB poke drawing setup
std::unique_ptr<NativeVertexFormat> m_poke_vertex_format;
std::unique_ptr<AbstractPipeline> m_color_poke_pipeline;
std::unique_ptr<AbstractPipeline> m_depth_poke_pipeline;
std::vector<EFBPokeVertex> m_color_poke_vertices;
std::vector<EFBPokeVertex> m_depth_poke_vertices;
};
extern std::unique_ptr<FramebufferManager> g_framebuffer_manager;

View File

@ -1,28 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/FramebufferManagerBase.h"
#include <memory>
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/RenderBase.h"
std::unique_ptr<FramebufferManagerBase> g_framebuffer_manager;
unsigned int FramebufferManagerBase::m_EFBLayers = 1;
FramebufferManagerBase::~FramebufferManagerBase() = default;
AbstractTextureFormat FramebufferManagerBase::GetEFBDepthFormat()
{
// 32-bit depth clears are broken in the Adreno Vulkan driver, and have no effect.
// To work around this, we use a D24_S8 buffer instead, which results in a loss of accuracy.
// We still resolve this to a R32F texture, as there is no 24-bit format.
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR))
return AbstractTextureFormat::D24_S8;
else
return AbstractTextureFormat::D32F;
}

View File

@ -1,30 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "Common/CommonTypes.h"
enum class AbstractTextureFormat : u32;
inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper)
{
return !((aLower >= bUpper) || (bLower >= aUpper));
}
class FramebufferManagerBase
{
public:
virtual ~FramebufferManagerBase();
static unsigned int GetEFBLayers() { return m_EFBLayers; }
static AbstractTextureFormat GetEFBDepthFormat();
protected:
static unsigned int m_EFBLayers;
};
extern std::unique_ptr<FramebufferManagerBase> g_framebuffer_manager;

View File

@ -0,0 +1,464 @@
#include "VideoCommon/FramebufferShaderGen.h"
#include <sstream>
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/VertexShaderGen.h"
namespace FramebufferShaderGen
{
static APIType GetAPIType()
{
return g_ActiveConfig.backend_info.api_type;
}
static void EmitUniformBufferDeclaration(std::stringstream& ss)
{
if (GetAPIType() == APIType::D3D)
ss << "cbuffer UBO : register(b0)\n";
else
ss << "UBO_BINDING(std140, 1) uniform UBO\n";
}
static void EmitSamplerDeclarations(std::stringstream& ss, u32 start = 0, u32 end = 1,
bool multisampled = false)
{
switch (GetAPIType())
{
case APIType::D3D:
{
for (u32 i = start; i < end; i++)
{
ss << (multisampled ? "Texture2DMSArray<float4>" : "Texture2DArray<float4>") << " tex" << i
<< " : register(t" << i << ");\n";
ss << "SamplerState"
<< " samp" << i << " : register(s" << i << ");\n";
}
}
break;
case APIType::OpenGL:
case APIType::Vulkan:
{
for (u32 i = start; i < end; i++)
{
ss << "SAMPLER_BINDING(" << i << ") uniform "
<< (multisampled ? "sampler2DMSArray" : "sampler2DArray") << " samp" << i << ";\n";
}
}
break;
default:
break;
}
}
static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords)
{
switch (GetAPIType())
{
case APIType::D3D:
ss << "tex" << n << ".Sample(samp" << n << ", " << coords << ")";
break;
case APIType::OpenGL:
case APIType::Vulkan:
ss << "texture(samp" << n << ", " << coords << ")";
break;
default:
break;
}
}
static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
u32 num_color_inputs, bool position_input,
u32 num_tex_outputs, u32 num_color_outputs,
const char* extra_inputs = "")
{
switch (GetAPIType())
{
case APIType::D3D:
{
ss << "void main(";
for (u32 i = 0; i < num_tex_inputs; i++)
ss << "in float3 rawtex" << i << " : TEXCOORD" << i << ", ";
for (u32 i = 0; i < num_color_inputs; i++)
ss << "in float4 rawcolor" << i << " : COLOR" << i << ", ";
if (position_input)
ss << "in float4 rawpos : POSITION, ";
ss << extra_inputs;
for (u32 i = 0; i < num_tex_outputs; i++)
ss << "out float3 v_tex" << i << " : TEXCOORD" << i << ", ";
for (u32 i = 0; i < num_color_outputs; i++)
ss << "out float4 v_col" << i << " : COLOR" << i << ", ";
ss << "out float4 opos : SV_Position)\n";
}
break;
case APIType::OpenGL:
case APIType::Vulkan:
{
for (u32 i = 0; i < num_tex_inputs; i++)
ss << "ATTRIBUTE_LOCATION(" << (SHADER_TEXTURE0_ATTRIB + i) << ") in float3 rawtex" << i
<< ";\n";
for (u32 i = 0; i < num_color_inputs; i++)
ss << "ATTRIBUTE_LOCATION(" << (SHADER_COLOR0_ATTRIB + i) << ") in float4 rawcolor" << i
<< ";\n";
if (position_input)
ss << "ATTRIBUTE_LOCATION(" << SHADER_POSITION_ATTRIB << ") in float4 rawpos;\n";
for (u32 i = 0; i < num_tex_outputs; i++)
ss << "VARYING_LOCATION(" << i << ") out float3 v_tex" << i << ";\n";
for (u32 i = 0; i < num_color_outputs; i++)
ss << "VARYING_LOCATION(" << (num_tex_inputs + i) << ") out float4 v_col" << i << ";\n";
ss << "#define opos gl_Position\n";
ss << extra_inputs << "\n";
ss << "void main()\n";
}
break;
default:
break;
}
}
static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
u32 num_color_inputs, const char* output_type = "float4",
const char* extra_vars = "")
{
switch (GetAPIType())
{
case APIType::D3D:
{
ss << "void main(";
for (u32 i = 0; i < num_tex_inputs; i++)
ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", ";
for (u32 i = 0; i < num_color_inputs; i++)
ss << "in float4 v_col" << i << " : COLOR" << i << ", ";
ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n";
}
break;
case APIType::OpenGL:
case APIType::Vulkan:
{
for (u32 i = 0; i < num_tex_inputs; i++)
ss << "VARYING_LOCATION(" << i << ") in float3 v_tex" << i << ";\n";
for (u32 i = 0; i < num_color_inputs; i++)
ss << "VARYING_LOCATION(" << (num_tex_inputs + i) << ") in float4 v_col" << i << ";\n";
ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n";
ss << extra_vars << "\n";
ss << "void main()\n";
}
break;
default:
break;
}
}
std::string GenerateScreenQuadVertexShader()
{
std::stringstream ss;
EmitVertexMainDeclaration(ss, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
ss << "{\n";
ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n";
ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n";
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
ss << " opos.y = -opos.y;\n";
ss << "}\n";
return ss.str();
}
std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
{
std::stringstream ss;
if (GetAPIType() == APIType::D3D)
{
ss << "struct VS_OUTPUT\n";
ss << "{\n";
for (u32 i = 0; i < num_tex; i++)
ss << " float3 tex" << i << " : TEXCOORD" << i << ";\n";
for (u32 i = 0; i < num_colors; i++)
ss << " float4 color" << i << " : COLOR" << i << ";\n";
ss << " float4 position : SV_Position;\n";
ss << "};\n";
ss << "struct GS_OUTPUT\n";
ss << "{";
for (u32 i = 0; i < num_tex; i++)
ss << " float3 tex" << i << " : TEXCOORD" << i << ";\n";
for (u32 i = 0; i < num_colors; i++)
ss << " float4 color" << i << " : COLOR" << i << ";\n";
ss << " float4 position : SV_Position;\n";
ss << " uint slice : SV_RenderTargetArrayIndex;\n";
ss << "};\n\n";
ss << "[maxvertexcount(6)]\n";
ss << "void main(triangle VS_OUTPUT vso[3], inout TriangleStream<GS_OUTPUT> output)\n";
ss << "{\n";
ss << " for (uint slice = 0; slice < 2u; slice++)\n";
ss << " {\n";
ss << " for (int i = 0; i < 3; i++)\n";
ss << " {\n";
ss << " GS_OUTPUT gso;\n";
ss << " gso.position = vso[i].position;\n";
for (u32 i = 0; i < num_tex; i++)
ss << " gso.tex" << i << " = float3(vso[i].tex" << i << ".xy, float(slice));\n";
for (u32 i = 0; i < num_colors; i++)
ss << " gso.color" << i << " = vso[i].color" << i << ";\n";
ss << " gso.slice = slice;\n";
ss << " output.Append(gso);\n";
ss << " }\n";
ss << " output.RestartStrip();\n";
ss << " }\n";
ss << "}\n";
}
else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan)
{
ss << "layout(triangles) in;\n";
ss << "layout(triangle_strip, max_vertices = 6) out;\n";
for (u32 i = 0; i < num_tex; i++)
{
ss << "layout(location = " << i << ") in float3 v_tex" << i << "[];\n";
ss << "layout(location = " << i << ") out float3 out_tex" << i << ";\n";
}
for (u32 i = 0; i < num_colors; i++)
{
ss << "layout(location = " << (num_tex + i) << ") in float4 v_col" << i << "[];\n";
ss << "layout(location = " << (num_tex + i) << ") out float4 out_col" << i << ";\n";
}
ss << "\n";
ss << "void main()\n";
ss << "{\n";
ss << " for (int j = 0; j < 2; j++)\n";
ss << " {\n";
ss << " gl_Layer = j;\n";
// We have to explicitly unroll this loop otherwise the GL compiler gets cranky.
for (u32 v = 0; v < 3; v++)
{
ss << " gl_Position = gl_in[" << v << "].gl_Position;\n";
for (u32 i = 0; i < num_tex; i++)
ss << " out_tex" << i << " = float3(v_tex" << i << "[" << v << "].xy, float(j));\n";
for (u32 i = 0; i < num_colors; i++)
ss << " out_col" << i << " = v_col" << i << "[" << v << "];\n";
ss << " EmitVertex();\n\n";
}
ss << " EndPrimitive();\n";
ss << " }\n";
ss << "}\n";
}
return ss.str();
}
std::string GenerateTextureCopyVertexShader()
{
std::stringstream ss;
EmitUniformBufferDeclaration(ss);
ss << "{";
ss << " float2 src_offset;\n";
ss << " float2 src_size;\n";
ss << "};\n\n";
EmitVertexMainDeclaration(ss, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID");
ss << "{\n";
ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n";
ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n";
ss << " v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n";
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
ss << " opos.y = -opos.y;\n";
ss << "}\n";
return ss.str();
}
std::string GenerateTextureCopyPixelShader()
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 1, false);
EmitPixelMainDeclaration(ss, 1, 0);
ss << "{\n";
ss << " ocol0 = ";
EmitSampleTexture(ss, 0, "v_tex0");
ss << ";\n";
ss << "}\n";
return ss.str();
}
std::string GenerateColorPixelShader()
{
std::stringstream ss;
EmitPixelMainDeclaration(ss, 0, 1);
ss << "{\n";
ss << " ocol0 = v_col0;\n";
ss << "}\n";
return ss.str();
}
std::string GenerateResolveDepthPixelShader(u32 samples)
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 1, true);
EmitPixelMainDeclaration(ss, 1, 0, "float",
GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : "");
ss << "{\n";
ss << " int layer = int(v_tex0.z);\n";
if (GetAPIType() == APIType::D3D)
ss << " int3 coords = int3(int2(ipos.xy), layer);\n";
else
ss << " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n";
// Take the minimum of all depth samples.
if (GetAPIType() == APIType::D3D)
ss << " ocol0 = tex0.Load(coords, 0).r;\n";
else
ss << " ocol0 = texelFetch(samp0, coords, 0).r;\n";
ss << " for (int i = 1; i < " << samples << "; i++)\n";
if (GetAPIType() == APIType::D3D)
ss << " ocol0 = min(ocol0, tex0.Load(coords, i).r);\n";
else
ss << " ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n";
ss << "}\n";
return ss.str();
}
std::string GenerateClearVertexShader()
{
std::stringstream ss;
EmitUniformBufferDeclaration(ss);
ss << "{\n";
ss << " float4 clear_color;\n";
ss << " float clear_depth;\n";
ss << "};\n";
EmitVertexMainDeclaration(ss, 0, 0, false, 0, 1,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
ss << "{\n";
ss << " float2 coord = float2(float((id << 1) & 2), float(id & 2));\n";
ss << " opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n";
ss << " v_col0 = clear_color;\n";
// NDC space is flipped in Vulkan
if (GetAPIType() == APIType::Vulkan)
ss << " opos.y = -opos.y;\n";
ss << "}\n";
return ss.str();
}
std::string GenerateEFBPokeVertexShader()
{
std::stringstream ss;
EmitVertexMainDeclaration(ss, 0, 1, true, 0, 1);
ss << "{\n";
ss << " v_col0 = rawcolor0;\n";
ss << " opos = float4(rawpos.xyz, 1.0f);\n";
if (g_ActiveConfig.backend_info.bSupportsLargePoints)
ss << " gl_PointSize = rawpos.w;\n";
// NDC space is flipped in Vulkan.
if (GetAPIType() == APIType::Vulkan)
ss << " opos.y = -opos.y;\n";
ss << "}\n";
return ss.str();
}
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples)
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 1, samples > 1);
EmitPixelMainDeclaration(ss, 1, 0, "float4",
GetAPIType() == APIType::D3D ?
"in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " :
"");
ss << "{\n";
ss << " int layer = int(v_tex0.z);\n";
if (GetAPIType() == APIType::D3D)
ss << " int3 coords = int3(int2(ipos.xy), layer);\n";
else
ss << " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n";
if (samples == 1)
{
// No MSAA at all.
if (GetAPIType() == APIType::D3D)
ss << " float4 val = tex0.Load(int4(coords, 0));\n";
else
ss << " float4 val = texelFetch(samp0, coords, 0);\n";
}
else if (g_ActiveConfig.bSSAA)
{
// Sample shading, shader runs once per sample
if (GetAPIType() == APIType::D3D)
ss << " float4 val = tex0.Load(coords, isample);";
else
ss << " float4 val = texelFetch(samp0, coords, gl_SampleID);";
}
else
{
// MSAA without sample shading, average out all samples.
ss << " float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n";
ss << " for (int i = 0; i < " << samples << "; i++)\n";
if (GetAPIType() == APIType::D3D)
ss << " val += tex0.Load(coords, i);\n";
else
ss << " val += texelFetch(samp0, coords, i);\n";
ss << " val /= float(" << samples << ");\n";
}
switch (convtype)
{
case EFBReinterpretType::RGB8ToRGBA6:
ss << " int4 src8 = int4(round(val * 255.f));\n";
ss << " int4 dst6;\n";
ss << " dst6.r = src8.r >> 2;\n";
ss << " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n";
ss << " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n";
ss << " dst6.a = src8.b & 0x3F;\n";
ss << " ocol0 = float4(dst6) / 63.f;\n";
break;
case EFBReinterpretType::RGB8ToRGB565:
ss << " ocol0 = val;\n";
break;
case EFBReinterpretType::RGBA6ToRGB8:
ss << " int4 src6 = int4(round(val * 63.f));\n";
ss << " int4 dst8;\n";
ss << " dst8.r = (src6.r << 2) | (src6.g >> 4);\n";
ss << " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n";
ss << " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n";
ss << " dst8.a = 255;\n";
ss << " ocol0 = float4(dst8) / 255.f;\n";
break;
case EFBReinterpretType::RGBA6ToRGB565:
ss << " ocol0 = val;\n";
break;
case EFBReinterpretType::RGB565ToRGB8:
ss << " ocol0 = val;\n";
break;
case EFBReinterpretType::RGB565ToRGBA6:
//
ss << " ocol0 = val;\n";
break;
}
ss << "}\n";
return ss.str();
}
} // namespace FramebufferShaderGen

View File

@ -0,0 +1,32 @@
#pragma once
#include <string>
#include "VideoCommon/VideoCommon.h"
enum class EFBReinterpretType;
namespace FramebufferShaderGen
{
struct Config
{
Config(APIType api_type_, u32 efb_samples_, u32 efb_layers_, bool ssaa_)
: api_type(api_type_), efb_samples(efb_samples_), efb_layers(efb_layers_), ssaa(ssaa_)
{
}
APIType api_type;
u32 efb_samples;
u32 efb_layers;
bool ssaa;
};
std::string GenerateScreenQuadVertexShader();
std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors);
std::string GenerateTextureCopyVertexShader();
std::string GenerateTextureCopyPixelShader();
std::string GenerateResolveDepthPixelShader(u32 samples);
std::string GenerateClearVertexShader();
std::string GenerateEFBPokeVertexShader();
std::string GenerateColorPixelShader();
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples);
} // namespace FramebufferShaderGen

View File

@ -104,17 +104,17 @@ struct hash<PortableVertexDeclaration>
class NativeVertexFormat
{
public:
NativeVertexFormat(const PortableVertexDeclaration& vtx_decl) : m_decl(vtx_decl) {}
virtual ~NativeVertexFormat() {}
NativeVertexFormat(const NativeVertexFormat&) = delete;
NativeVertexFormat& operator=(const NativeVertexFormat&) = delete;
NativeVertexFormat(NativeVertexFormat&&) = default;
NativeVertexFormat& operator=(NativeVertexFormat&&) = default;
u32 GetVertexStride() const { return vtx_decl.stride; }
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
u32 GetVertexStride() const { return m_decl.stride; }
const PortableVertexDeclaration& GetVertexDeclaration() const { return m_decl; }
protected:
// Let subclasses construct.
NativeVertexFormat() {}
PortableVertexDeclaration vtx_decl;
PortableVertexDeclaration m_decl;
};

View File

@ -171,8 +171,7 @@ PixelShaderUid GetPixelShaderUid()
uid_data->genMode_numindstages = bpmem.genMode.numindstages;
uid_data->genMode_numtevstages = bpmem.genMode.numtevstages;
uid_data->genMode_numtexgens = bpmem.genMode.numtexgens;
uid_data->bounding_box = g_ActiveConfig.BBoxUseFragmentShaderImplementation() &&
g_ActiveConfig.bBBoxEnable && BoundingBox::active;
uid_data->bounding_box = g_ActiveConfig.bBBoxEnable && BoundingBox::active;
uid_data->rgba6_format =
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor;
uid_data->dither = bpmem.blendmode.dither && uid_data->rgba6_format;
@ -456,10 +455,6 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n");
}
}
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, num_texgens, host_config, "");
out.Write("};\n");
}
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
@ -804,7 +799,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
}
else
{
if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
if (!host_config.backend_reversed_depth_range)
out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
else
out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n");
@ -818,7 +813,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
// Note: z-textures are not written to depth buffer if early depth test is used
if (uid_data->per_pixel_depth && uid_data->early_ztest)
{
if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
if (!host_config.backend_reversed_depth_range)
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
else
out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
@ -839,7 +834,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
if (uid_data->per_pixel_depth && uid_data->late_ztest)
{
if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
if (!host_config.backend_reversed_depth_range)
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
else
out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
@ -1316,7 +1311,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
if (per_pixel_depth)
{
out.Write("\t\tdepth = %s;\n",
(ApiType == APIType::D3D || ApiType == APIType::Vulkan) ? "0.0" : "1.0");
!g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0");
}
// ZCOMPLOC HACK:

View File

@ -522,9 +522,7 @@ void PixelShaderManager::SetBlendModeChanged()
void PixelShaderManager::SetBoundingBoxActive(bool active)
{
const bool enable =
active && g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation();
const bool enable = active && g_ActiveConfig.bBBoxEnable;
if (enable == (constants.bounding_box != 0))
return;

View File

@ -5,74 +5,44 @@
#include <sstream>
#include <string>
#include "Common/Assert.h"
#include "Common/CommonPaths.h"
#include "Common/CommonTypes.h"
#include "Common/FileSearch.h"
#include "Common/FileUtil.h"
#include "Common/IniFile.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/PostProcessing.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoConfig.h"
namespace VideoCommon
{
static const char s_default_shader[] = "void main() { SetOutput(Sample()); }\n";
PostProcessingShaderImplementation::PostProcessingShaderImplementation()
{
m_timer.Start();
}
PostProcessingConfiguration::PostProcessingConfiguration() = default;
PostProcessingShaderImplementation::~PostProcessingShaderImplementation()
{
m_timer.Stop();
}
PostProcessingConfiguration::~PostProcessingConfiguration() = default;
static std::vector<std::string> GetShaders(const std::string& sub_dir = "")
{
std::vector<std::string> paths =
Common::DoFileSearch({File::GetUserPath(D_SHADERS_IDX) + sub_dir,
File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir},
{".glsl"});
std::vector<std::string> result;
for (std::string path : paths)
{
std::string name;
SplitPath(path, nullptr, &name, nullptr);
result.push_back(name);
}
return result;
}
std::vector<std::string> PostProcessingShaderImplementation::GetShaderList(APIType api_type)
{
// Currently there is no differentiation between API types and shader languages.
// This could change in the future, hence the api_type parameter, but ideally,
// shaders should be compatible across backends.
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
return GetShaders();
return {};
}
std::vector<std::string> PostProcessingShaderImplementation::GetAnaglyphShaderList(APIType api_type)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
return GetShaders(ANAGLYPH_DIR DIR_SEP);
return {};
}
PostProcessingShaderConfiguration::PostProcessingShaderConfiguration() = default;
PostProcessingShaderConfiguration::~PostProcessingShaderConfiguration() = default;
std::string PostProcessingShaderConfiguration::LoadShader(std::string shader)
void PostProcessingConfiguration::LoadShader(const std::string& shader)
{
// Load the shader from the configuration if there isn't one sent to us.
if (shader.empty())
shader = g_ActiveConfig.sPostProcessingShader;
m_current_shader = shader;
if (shader.empty())
{
LoadDefaultShader();
return;
}
const std::string sub_dir =
(g_Config.stereo_mode == StereoMode::Anaglyph) ? ANAGLYPH_DIR DIR_SEP : "";
@ -81,32 +51,32 @@ std::string PostProcessingShaderConfiguration::LoadShader(std::string shader)
std::string code;
std::string path = File::GetUserPath(D_SHADERS_IDX) + sub_dir + shader + ".glsl";
if (shader.empty())
if (!File::Exists(path))
{
code = s_default_shader;
// Fallback to shared user dir
path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl";
}
else
{
if (!File::Exists(path))
{
// Fallback to shared user dir
path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl";
}
if (!File::ReadFileToString(path, code))
{
ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str());
code = s_default_shader;
}
if (!File::ReadFileToString(path, code))
{
ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str());
LoadDefaultShader();
return;
}
LoadOptions(code);
LoadOptionsConfiguration();
return code;
m_current_shader_code = code;
}
void PostProcessingShaderConfiguration::LoadOptions(const std::string& code)
void PostProcessingConfiguration::LoadDefaultShader()
{
m_options.clear();
m_any_options_dirty = false;
m_current_shader_code = s_default_shader;
}
void PostProcessingConfiguration::LoadOptions(const std::string& code)
{
const std::string config_start_delimiter = "[configuration]";
const std::string config_end_delimiter = "[/configuration]";
@ -254,7 +224,7 @@ void PostProcessingShaderConfiguration::LoadOptions(const std::string& code)
}
}
void PostProcessingShaderConfiguration::LoadOptionsConfiguration()
void PostProcessingConfiguration::LoadOptionsConfiguration()
{
IniFile ini;
ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
@ -288,7 +258,7 @@ void PostProcessingShaderConfiguration::LoadOptionsConfiguration()
}
}
void PostProcessingShaderConfiguration::SaveOptionsConfiguration()
void PostProcessingConfiguration::SaveOptionsConfiguration()
{
IniFile ini;
ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX));
@ -331,13 +301,7 @@ void PostProcessingShaderConfiguration::SaveOptionsConfiguration()
ini.Save(File::GetUserPath(F_DOLPHINCONFIG_IDX));
}
void PostProcessingShaderConfiguration::ReloadShader()
{
m_current_shader = "";
}
void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, int index,
float value)
void PostProcessingConfiguration::SetOptionf(const std::string& option, int index, float value)
{
auto it = m_options.find(option);
@ -346,7 +310,7 @@ void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, in
m_any_options_dirty = true;
}
void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, int index, s32 value)
void PostProcessingConfiguration::SetOptioni(const std::string& option, int index, s32 value)
{
auto it = m_options.find(option);
@ -355,7 +319,7 @@ void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, in
m_any_options_dirty = true;
}
void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bool value)
void PostProcessingConfiguration::SetOptionb(const std::string& option, bool value)
{
auto it = m_options.find(option);
@ -363,3 +327,384 @@ void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bo
it->second.m_dirty = true;
m_any_options_dirty = true;
}
PostProcessing::PostProcessing()
{
m_timer.Start();
}
PostProcessing::~PostProcessing()
{
m_timer.Stop();
}
static std::vector<std::string> GetShaders(const std::string& sub_dir = "")
{
std::vector<std::string> paths =
Common::DoFileSearch({File::GetUserPath(D_SHADERS_IDX) + sub_dir,
File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir},
{".glsl"});
std::vector<std::string> result;
for (std::string path : paths)
{
std::string name;
SplitPath(path, nullptr, &name, nullptr);
result.push_back(name);
}
return result;
}
std::vector<std::string> PostProcessing::GetShaderList()
{
return GetShaders();
}
std::vector<std::string> PostProcessing::GetAnaglyphShaderList()
{
return GetShaders(ANAGLYPH_DIR DIR_SEP);
}
bool PostProcessing::Initialize(AbstractTextureFormat format)
{
m_framebuffer_format = format;
if (!CompileVertexShader() || !CompilePixelShader() || !CompilePipeline())
return false;
return true;
}
void PostProcessing::RecompileShader()
{
m_pipeline.reset();
m_pixel_shader.reset();
if (!CompilePixelShader())
return;
CompilePipeline();
}
void PostProcessing::RecompilePipeline()
{
m_pipeline.reset();
CompilePipeline();
}
void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
const MathUtil::Rectangle<int>& src,
const AbstractTexture* src_tex, int src_layer)
{
if (g_renderer->GetCurrentFramebuffer()->GetColorFormat() != m_framebuffer_format)
{
m_framebuffer_format = g_renderer->GetCurrentFramebuffer()->GetColorFormat();
RecompilePipeline();
}
if (!m_pipeline)
return;
FillUniformBuffer(src, src_tex, src_layer);
g_vertex_manager->UploadUtilityUniforms(m_uniform_staging_buffer.data(),
static_cast<u32>(m_uniform_staging_buffer.size()));
g_renderer->SetViewportAndScissor(
g_renderer->ConvertFramebufferRectangle(dst, g_renderer->GetCurrentFramebuffer()));
g_renderer->SetPipeline(m_pipeline.get());
g_renderer->SetTexture(0, src_tex);
g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState());
g_renderer->Draw(0, 3);
}
std::string PostProcessing::GetUniformBufferHeader() const
{
std::stringstream ss;
u32 unused_counter = 1;
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
ss << "cbuffer PSBlock : register(b0) {\n";
else
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
// Builtin uniforms
ss << " float4 resolution;\n";
ss << " float4 src_rect;\n";
ss << " uint time;\n";
ss << " int layer;\n";
for (u32 i = 0; i < 2; i++)
ss << " uint ubo_align_" << unused_counter++ << "_;\n";
ss << "\n";
// Custom options/uniforms
for (const auto& it : m_config.GetOptions())
{
if (it.second.m_type ==
PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_BOOL)
{
ss << StringFromFormat(" int %s;\n", it.first.c_str());
for (u32 i = 0; i < 3; i++)
ss << " int ubo_align_" << unused_counter++ << "_;\n";
}
else if (it.second.m_type ==
PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER)
{
u32 count = static_cast<u32>(it.second.m_integer_values.size());
if (count == 1)
ss << StringFromFormat(" int %s;\n", it.first.c_str());
else
ss << StringFromFormat(" int%u %s;\n", count, it.first.c_str());
for (u32 i = count; i < 4; i++)
ss << " int ubo_align_" << unused_counter++ << "_;\n";
}
else if (it.second.m_type ==
PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT)
{
u32 count = static_cast<u32>(it.second.m_float_values.size());
if (count == 1)
ss << StringFromFormat(" float %s;\n", it.first.c_str());
else
ss << StringFromFormat(" float%u %s;\n", count, it.first.c_str());
for (u32 i = count; i < 4; i++)
ss << " float ubo_align_" << unused_counter++ << "_;\n";
}
}
ss << "};\n\n";
return ss.str();
}
std::string PostProcessing::GetHeader() const
{
std::stringstream ss;
ss << GetUniformBufferHeader();
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
{
ss << "Texture2DArray samp0 : register(t0);\n";
ss << "SamplerState samp0_ss : register(s0);\n";
}
else
{
ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n";
ss << "VARYING_LOCATION(0) in float3 v_tex0;\n";
ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n";
}
// Rename main, since we need to set up globals
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
{
ss << R"(
#define main real_main
static float3 v_tex0;
static float4 ocol0;
// Wrappers for sampling functions.
#define texture(sampler, coords) sampler.Sample(sampler##_ss, coords)
#define textureOffset(sampler, coords, offset) sampler.Sample(sampler##_ss, coords, offset)
)";
}
ss << R"(
float4 Sample() { return texture(samp0, float3(v_tex0.xy, float(layer))); }
float4 SampleLocation(float2 location) { return texture(samp0, float3(location, float(layer))); }
float4 SampleLayer(int layer) { return texture(samp0, float3(v_tex0.xy, float(layer))); }
#define SampleOffset(offset) textureOffset(samp0, float3(v_tex0.xy, float(layer)), offset)
float2 GetResolution()
{
return resolution.xy;
}
float2 GetInvResolution()
{
return resolution.zw;
}
float2 GetCoordinates()
{
return v_tex0.xy;
}
uint GetTime()
{
return time;
}
void SetOutput(float4 color)
{
ocol0 = color;
}
#define GetOption(x) (x)
#define OptionEnabled(x) ((x) != 0)
)";
return ss.str();
}
std::string PostProcessing::GetFooter() const
{
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
{
return R"(
#undef main
void main(in float3 v_tex0_ : TEXCOORD0, out float4 ocol0_ : SV_Target)
{
v_tex0 = v_tex0_;
real_main();
ocol0_ = ocol0;
})";
}
else
{
return {};
}
}
bool PostProcessing::CompileVertexShader()
{
std::stringstream ss;
ss << GetUniformBufferHeader();
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
{
ss << "void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n";
ss << " out float4 opos : SV_Position) {\n";
}
else
{
ss << "VARYING_LOCATION(0) out float3 v_tex0;\n";
ss << "#define id gl_VertexID\n";
ss << "#define opos gl_Position\n";
ss << "void main() {\n";
}
ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n";
ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n";
ss << " v_tex0 = float3(src_rect.xy + (src_rect.zw * v_tex0.xy), 0.0f);\n";
if (g_ActiveConfig.backend_info.api_type == APIType::Vulkan)
ss << " opos.y = -opos.y;\n";
ss << "}\n";
m_vertex_shader = g_renderer->CreateShaderFromSource(ShaderStage::Vertex, ss.str());
if (!m_vertex_shader)
{
PanicAlert("Failed to compile post-processing vertex shader");
return false;
}
return true;
}
struct BuiltinUniforms
{
float resolution[4];
float src_rect[4];
s32 time;
u32 layer;
u32 padding[2];
};
size_t PostProcessing::CalculateUniformsSize() const
{
// Allocate a vec4 for each uniform to simplify allocation.
return sizeof(BuiltinUniforms) + m_config.GetOptions().size() * sizeof(float) * 4;
}
void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle<int>& src,
const AbstractTexture* src_tex, int src_layer)
{
const float rcp_src_width = 1.0f / src_tex->GetWidth();
const float rcp_src_height = 1.0f / src_tex->GetHeight();
BuiltinUniforms builtin_uniforms = {
{static_cast<float>(src_tex->GetWidth()), static_cast<float>(src_tex->GetHeight()),
rcp_src_width, rcp_src_height},
{static_cast<float>(src.left) * rcp_src_width, static_cast<float>(src.top) * rcp_src_height,
static_cast<float>(src.GetWidth()) * rcp_src_width,
static_cast<float>(src.GetHeight()) * rcp_src_height},
static_cast<s32>(m_timer.GetTimeElapsed()),
static_cast<u32>(src_layer),
};
u8* buf = m_uniform_staging_buffer.data();
std::memcpy(buf, &builtin_uniforms, sizeof(builtin_uniforms));
buf += sizeof(builtin_uniforms);
for (const auto& it : m_config.GetOptions())
{
union
{
u32 as_bool[4];
s32 as_int[4];
float as_float[4];
} value = {};
switch (it.second.m_type)
{
case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_BOOL:
value.as_bool[0] = it.second.m_bool_value ? 1 : 0;
break;
case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER:
ASSERT(it.second.m_integer_values.size() < 4);
std::copy_n(it.second.m_integer_values.begin(), it.second.m_integer_values.size(),
value.as_int);
break;
case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT:
ASSERT(it.second.m_float_values.size() < 4);
std::copy_n(it.second.m_float_values.begin(), it.second.m_float_values.size(),
value.as_float);
break;
}
std::memcpy(buf, &value, sizeof(value));
buf += sizeof(value);
}
}
bool PostProcessing::CompilePixelShader()
{
m_pipeline.reset();
m_pixel_shader.reset();
// Generate GLSL and compile the new shader.
m_config.LoadShader(g_ActiveConfig.sPostProcessingShader);
m_pixel_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, GetHeader() + m_config.GetShaderCode() + GetFooter());
if (!m_pixel_shader)
{
PanicAlert("Failed to compile post-processing shader %s", m_config.GetShader().c_str());
// Use default shader.
m_config.LoadDefaultShader();
m_pixel_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, GetHeader() + m_config.GetShaderCode() + GetFooter());
if (!m_pixel_shader)
return false;
}
m_uniform_staging_buffer.resize(CalculateUniformsSize());
return true;
}
bool PostProcessing::CompilePipeline()
{
AbstractPipelineConfig config = {};
config.vertex_shader = m_vertex_shader.get();
config.geometry_shader = g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer ?
g_shader_cache->GetTexcoordGeometryShader() :
nullptr;
config.pixel_shader = m_pixel_shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetColorFramebufferState(m_framebuffer_format);
config.usage = AbstractPipelineUsage::Utility;
m_pipeline = g_renderer->CreatePipeline(config);
if (!m_pipeline)
return false;
return true;
}
} // namespace VideoCommon

View File

@ -10,9 +10,16 @@
#include "Common/CommonTypes.h"
#include "Common/Timer.h"
#include "VideoCommon/TextureConfig.h"
#include "VideoCommon/VideoCommon.h"
class PostProcessingShaderConfiguration
class AbstractTexture;
class AbstractPipeline;
class AbstractShader;
namespace VideoCommon
{
class PostProcessingConfiguration
{
public:
struct ConfigurationOption
@ -48,16 +55,17 @@ public:
using ConfigMap = std::map<std::string, ConfigurationOption>;
PostProcessingShaderConfiguration();
virtual ~PostProcessingShaderConfiguration();
PostProcessingConfiguration();
virtual ~PostProcessingConfiguration();
// Loads the configuration with a shader
// If the argument is "" the class will load the shader from the g_activeConfig option.
// Returns the loaded shader source from file
std::string LoadShader(std::string shader = "");
void LoadShader(const std::string& shader);
void LoadDefaultShader();
void SaveOptionsConfiguration();
void ReloadShader();
const std::string& GetShader() const { return m_current_shader; }
const std::string& GetShaderCode() const { return m_current_shader_code; }
bool IsDirty() const { return m_any_options_dirty; }
void SetDirty(bool dirty) { m_any_options_dirty = dirty; }
bool HasOptions() const { return m_options.size() > 0; }
@ -72,26 +80,53 @@ public:
private:
bool m_any_options_dirty = false;
std::string m_current_shader;
std::string m_current_shader_code;
ConfigMap m_options;
void LoadOptions(const std::string& code);
void LoadOptionsConfiguration();
};
class PostProcessingShaderImplementation
class PostProcessing
{
public:
PostProcessingShaderImplementation();
virtual ~PostProcessingShaderImplementation();
PostProcessing();
virtual ~PostProcessing();
static std::vector<std::string> GetShaderList(APIType api_type);
static std::vector<std::string> GetAnaglyphShaderList(APIType api_type);
static std::vector<std::string> GetShaderList();
static std::vector<std::string> GetAnaglyphShaderList();
PostProcessingShaderConfiguration* GetConfig() { return &m_config; }
PostProcessingConfiguration* GetConfig() { return &m_config; }
bool Initialize(AbstractTextureFormat format);
void RecompileShader();
void RecompilePipeline();
void BlitFromTexture(const MathUtil::Rectangle<int>& dst, const MathUtil::Rectangle<int>& src,
const AbstractTexture* src_tex, int src_layer);
protected:
std::string GetUniformBufferHeader() const;
std::string GetHeader() const;
std::string GetFooter() const;
bool CompileVertexShader();
bool CompilePixelShader();
bool CompilePipeline();
size_t CalculateUniformsSize() const;
void FillUniformBuffer(const MathUtil::Rectangle<int>& src, const AbstractTexture* src_tex,
int src_layer);
// Timer for determining our time value
Common::Timer m_timer;
PostProcessingConfiguration m_config;
PostProcessingShaderConfiguration m_config;
std::unique_ptr<AbstractShader> m_vertex_shader;
std::unique_ptr<AbstractShader> m_pixel_shader;
std::unique_ptr<AbstractPipeline> m_pipeline;
AbstractTextureFormat m_framebuffer_format = AbstractTextureFormat::Undefined;
std::vector<u8> m_uniform_staging_buffer;
};
} // namespace VideoCommon

View File

@ -50,14 +50,15 @@
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/BPFunctions.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FPSCounter.h"
#include "VideoCommon/FramebufferManagerBase.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/PostProcessing.h"
#include "VideoCommon/ShaderCache.h"
@ -68,12 +69,10 @@
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
// TODO: Move these out of here.
int frameCount;
std::unique_ptr<Renderer> g_renderer;
static float AspectToWidescreen(float aspect)
@ -97,7 +96,14 @@ Renderer::~Renderer() = default;
bool Renderer::Initialize()
{
return InitializeImGui();
if (!InitializeImGui())
return false;
m_post_processor = std::make_unique<VideoCommon::PostProcessing>();
if (!m_post_processor->Initialize(m_backbuffer_format))
return false;
return true;
}
void Renderer::Shutdown()
@ -106,6 +112,142 @@ void Renderer::Shutdown()
// can require additional graphics sub-systems so it needs to be done first
ShutdownFrameDumping();
ShutdownImGui();
m_post_processor.reset();
}
void Renderer::BeginUtilityDrawing()
{
g_vertex_manager->Flush();
}
void Renderer::EndUtilityDrawing()
{
// Reset framebuffer/scissor/viewport. Pipeline will be reset at next draw.
g_framebuffer_manager->BindEFBFramebuffer();
BPFunctions::SetScissor();
BPFunctions::SetViewport();
}
void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer)
{
m_current_framebuffer = framebuffer;
}
void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer)
{
m_current_framebuffer = framebuffer;
}
void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer,
const ClearColor& color_value, float depth_value)
{
m_current_framebuffer = framebuffer;
}
std::unique_ptr<AbstractShader> Renderer::CreateShaderFromSource(ShaderStage stage,
const std::string& source)
{
return CreateShaderFromSource(stage, source.c_str(), source.size());
}
void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z)
{
g_framebuffer_manager->ClearEFB(rc, colorEnable, alphaEnable, zEnable, color, z);
}
void Renderer::ReinterpretPixelData(EFBReinterpretType convtype)
{
g_framebuffer_manager->ReinterpretPixelData(convtype);
}
u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
{
if (type == EFBAccessType::PeekColor)
{
u32 color = g_framebuffer_manager->PeekEFBColor(x, y);
// a little-endian value is expected to be returned
color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000));
// check what to do with the alpha channel (GX_PokeAlphaRead)
PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode();
if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24)
{
color = RGBA8ToRGBA6ToRGBA8(color);
}
else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16)
{
color = RGBA8ToRGB565ToRGBA8(color);
}
if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24)
{
color |= 0xFF000000;
}
if (alpha_read_mode.ReadMode == 2)
{
return color; // GX_READ_NONE
}
else if (alpha_read_mode.ReadMode == 1)
{
return color | 0xFF000000; // GX_READ_FF
}
else /*if(alpha_read_mode.ReadMode == 0)*/
{
return color & 0x00FFFFFF; // GX_READ_00
}
}
else // if (type == EFBAccessType::PeekZ)
{
// Depth buffer is inverted for improved precision near far plane
float depth = g_framebuffer_manager->PeekEFBDepth(x, y);
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
depth = 1.0f - depth;
u32 ret = 0;
if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16)
{
// if Z is in 16 bit format you must return a 16 bit integer
ret = MathUtil::Clamp<u32>(static_cast<u32>(depth * 65536.0f), 0, 0xFFFF);
}
else
{
ret = MathUtil::Clamp<u32>(static_cast<u32>(depth * 16777216.0f), 0, 0xFFFFFF);
}
return ret;
}
}
void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points)
{
if (type == EFBAccessType::PokeColor)
{
for (size_t i = 0; i < num_points; i++)
{
// Convert to expected format (BGRA->RGBA)
// TODO: Check alpha, depending on mode?
const EfbPokeData& point = points[i];
u32 color = ((point.data & 0xFF00FF00) | ((point.data >> 16) & 0xFF) |
((point.data << 16) & 0xFF0000));
g_framebuffer_manager->PokeEFBColor(point.x, point.y, color);
}
}
else // if (type == EFBAccessType::PokeZ)
{
for (size_t i = 0; i < num_points; i++)
{
// Convert to floating-point depth.
const EfbPokeData& point = points[i];
float depth = float(point.data & 0xFFFFFF) / 16777216.0f;
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
depth = 1.0f - depth;
g_framebuffer_manager->PokeEFBDepth(point.x, point.y, depth);
}
}
}
void Renderer::RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight,
@ -169,6 +311,8 @@ bool Renderer::CalculateTargetSize()
int new_efb_width = 0;
int new_efb_height = 0;
std::tie(new_efb_width, new_efb_height) = CalculateTargetScale(EFB_WIDTH, EFB_HEIGHT);
new_efb_width = std::max(new_efb_width, 1);
new_efb_height = std::max(new_efb_height, 1);
if (new_efb_width != m_target_width || new_efb_height != m_target_height)
{
@ -251,6 +395,11 @@ void Renderer::CheckForConfigChanges()
// Update texture cache settings with any changed options.
g_texture_cache->OnConfigChanged(g_ActiveConfig);
// Check for post-processing shader changes. Done up here as it doesn't affect anything outside
// the post-processor. Note that options are applied every frame, so no need to check those.
if (m_post_processor->GetConfig()->GetShader() != g_ActiveConfig.sPostProcessingShader)
m_post_processor->RecompileShader();
// Determine which (if any) settings have changed.
ShaderHostConfig new_host_config = ShaderHostConfig::GetCurrent();
u32 changed_bits = 0;
@ -278,13 +427,30 @@ void Renderer::CheckForConfigChanges()
// Notify the backend of the changes, if any.
OnConfigChanged(changed_bits);
// Framebuffer changed?
if (changed_bits & (CONFIG_CHANGE_BIT_MULTISAMPLES | CONFIG_CHANGE_BIT_STEREO_MODE |
CONFIG_CHANGE_BIT_TARGET_SIZE))
{
g_framebuffer_manager->RecreateEFBFramebuffer();
}
// Reload shaders if host config has changed.
if (changed_bits & (CONFIG_CHANGE_BIT_HOST_CONFIG | CONFIG_CHANGE_BIT_MULTISAMPLES))
{
OSD::AddMessage("Video config changed, reloading shaders.", OSD::Duration::NORMAL);
WaitForGPUIdle();
SetPipeline(nullptr);
g_vertex_manager->InvalidatePipelineObject();
g_shader_cache->SetHostConfig(new_host_config, g_ActiveConfig.iMultisamples);
g_shader_cache->SetHostConfig(new_host_config);
g_shader_cache->Reload();
g_framebuffer_manager->RecompileShaders();
}
// Viewport and scissor rect have to be reset since they will be scaled differently.
if (changed_bits & CONFIG_CHANGE_BIT_TARGET_SIZE)
{
BPFunctions::SetViewport();
BPFunctions::SetScissor();
}
}
@ -389,6 +555,86 @@ void Renderer::ResizeSurface()
m_surface_resized.Set();
}
void Renderer::SetViewportAndScissor(const MathUtil::Rectangle<int>& rect, float min_depth,
float max_depth)
{
SetViewport(static_cast<float>(rect.left), static_cast<float>(rect.top),
static_cast<float>(rect.GetWidth()), static_cast<float>(rect.GetHeight()), min_depth,
max_depth);
SetScissorRect(rect);
}
void Renderer::ScaleTexture(AbstractFramebuffer* dst_framebuffer,
const MathUtil::Rectangle<int>& dst_rect,
const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect)
{
ASSERT(dst_framebuffer->GetColorFormat() == AbstractTextureFormat::RGBA8);
BeginUtilityDrawing();
// The shader needs to know the source rectangle.
const auto converted_src_rect = g_renderer->ConvertFramebufferRectangle(
src_rect, src_texture->GetWidth(), src_texture->GetHeight());
const float rcp_src_width = 1.0f / src_texture->GetWidth();
const float rcp_src_height = 1.0f / src_texture->GetHeight();
const std::array<float, 4> uniforms = {{converted_src_rect.left * rcp_src_width,
converted_src_rect.top * rcp_src_height,
converted_src_rect.GetWidth() * rcp_src_width,
converted_src_rect.GetHeight() * rcp_src_height}};
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
// Discard if we're overwriting the whole thing.
if (static_cast<u32>(dst_rect.GetWidth()) == dst_framebuffer->GetWidth() &&
static_cast<u32>(dst_rect.GetHeight()) == dst_framebuffer->GetHeight())
{
SetAndDiscardFramebuffer(dst_framebuffer);
}
else
{
SetFramebuffer(dst_framebuffer);
}
SetViewportAndScissor(ConvertFramebufferRectangle(dst_rect, dst_framebuffer));
SetPipeline(dst_framebuffer->GetLayers() > 1 ? g_shader_cache->GetRGBA8StereoCopyPipeline() :
g_shader_cache->GetRGBA8CopyPipeline());
SetTexture(0, src_texture);
SetSamplerState(0, RenderState::GetLinearSamplerState());
Draw(0, 3);
EndUtilityDrawing();
if (dst_framebuffer->GetColorAttachment())
dst_framebuffer->GetColorAttachment()->FinishedRendering();
}
MathUtil::Rectangle<int>
Renderer::ConvertFramebufferRectangle(const MathUtil::Rectangle<int>& rect,
const AbstractFramebuffer* framebuffer)
{
return ConvertFramebufferRectangle(rect, framebuffer->GetWidth(), framebuffer->GetHeight());
}
MathUtil::Rectangle<int> Renderer::ConvertFramebufferRectangle(const MathUtil::Rectangle<int>& rect,
u32 fb_width, u32 fb_height)
{
MathUtil::Rectangle<int> ret = rect;
if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin)
{
ret.top = fb_height - rect.bottom;
ret.bottom = fb_height - rect.top;
}
return ret;
}
TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc)
{
TargetRectangle result;
result.left = EFBToScaledX(rc.left);
result.top = EFBToScaledY(rc.top);
result.right = EFBToScaledX(rc.right);
result.bottom = EFBToScaledY(rc.bottom);
return result;
}
std::tuple<float, float> Renderer::ScaleToDisplayAspectRatio(const int width,
const int height) const
{
@ -700,7 +946,7 @@ bool Renderer::InitializeImGui()
vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false};
vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false};
vdecl.stride = sizeof(ImDrawVert);
m_imgui_vertex_format = g_vertex_manager->CreateNativeVertexFormat(vdecl);
m_imgui_vertex_format = CreateNativeVertexFormat(vdecl);
if (!m_imgui_vertex_format)
{
PanicAlert("Failed to create imgui vertex format");
@ -723,10 +969,10 @@ bool Renderer::InitializeImGui()
pconfig.vertex_format = m_imgui_vertex_format.get();
pconfig.vertex_shader = vertex_shader.get();
pconfig.pixel_shader = pixel_shader.get();
pconfig.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex;
pconfig.rasterization_state.primitive = PrimitiveType::Triangles;
pconfig.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex;
pconfig.blending_state.hex = RenderState::GetNoBlendingBlendState().hex;
pconfig.rasterization_state =
RenderState::GetCullBackFaceRasterizationState(PrimitiveType::Triangles);
pconfig.depth_state = RenderState::GetNoDepthTestingDepthState();
pconfig.blending_state = RenderState::GetNoBlendingBlendState();
pconfig.blending_state.blendenable = true;
pconfig.blending_state.srcfactor = BlendMode::SRCALPHA;
pconfig.blending_state.dstfactor = BlendMode::INVSRCALPHA;
@ -752,7 +998,7 @@ bool Renderer::InitializeImGui()
io.Fonts->GetTexDataAsRGBA32(&font_tex_pixels, &font_tex_width, &font_tex_height);
TextureConfig font_tex_config(font_tex_width, font_tex_height, 1, 1, 1,
AbstractTextureFormat::RGBA8, false);
AbstractTextureFormat::RGBA8, 0);
std::unique_ptr<AbstractTexture> font_tex = CreateTexture(font_tex_config);
if (!font_tex)
{
@ -799,10 +1045,8 @@ void Renderer::BeginImGuiFrame()
ImGui::NewFrame();
}
void Renderer::RenderImGui()
void Renderer::DrawImGui()
{
ImGui::Render();
ImDrawData* draw_data = ImGui::GetDrawData();
if (!draw_data)
return;
@ -842,9 +1086,11 @@ void Renderer::RenderImGui()
continue;
}
SetScissorRect(MathUtil::Rectangle<int>(
static_cast<int>(cmd.ClipRect.x), static_cast<int>(cmd.ClipRect.y),
static_cast<int>(cmd.ClipRect.z), static_cast<int>(cmd.ClipRect.w)));
SetScissorRect(ConvertFramebufferRectangle(
MathUtil::Rectangle<int>(
static_cast<int>(cmd.ClipRect.x), static_cast<int>(cmd.ClipRect.y),
static_cast<int>(cmd.ClipRect.z), static_cast<int>(cmd.ClipRect.w)),
m_current_framebuffer));
SetTexture(0, reinterpret_cast<const AbstractTexture*>(cmd.TextureId));
DrawIndexed(base_index, cmd.ElemCount, base_vertex);
base_index += cmd.ElemCount;
@ -859,7 +1105,10 @@ std::unique_lock<std::mutex> Renderer::GetImGuiLock()
void Renderer::BeginUIFrame()
{
ResetAPIState();
if (IsHeadless())
return;
BeginUtilityDrawing();
BindBackbuffer({0.0f, 0.0f, 0.0f, 1.0f});
}
@ -867,16 +1116,19 @@ void Renderer::EndUIFrame()
{
{
auto lock = GetImGuiLock();
RenderImGui();
ImGui::Render();
}
if (!IsHeadless())
{
DrawImGui();
std::lock_guard<std::mutex> guard(m_swap_mutex);
PresentBackbuffer();
EndUtilityDrawing();
}
BeginImGuiFrame();
RestoreAPIState();
}
void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
@ -946,32 +1198,34 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
// with the loader, and it has not been unmapped yet. Force a pipeline flush to avoid this.
g_vertex_manager->Flush();
// Render the XFB to the screen.
ResetAPIState();
BindBackbuffer({0.0f, 0.0f, 0.0f, 1.0f});
UpdateDrawRectangle();
RenderXFBToScreen(xfb_entry->texture.get(), xfb_rect);
// Hold the imgui lock while we're presenting.
// It's only to prevent races on inputs anyway, at this point.
// Render any UI elements to the draw list.
{
auto lock = GetImGuiLock();
DrawDebugText();
OSD::DrawMessages();
RenderImGui();
ImGui::Render();
}
// Present to the window system.
// Render the XFB to the screen.
BeginUtilityDrawing();
if (!IsHeadless())
{
std::lock_guard<std::mutex> guard(m_swap_mutex);
PresentBackbuffer();
}
BindBackbuffer({{0.0f, 0.0f, 0.0f, 1.0f}});
UpdateDrawRectangle();
RenderXFBToScreen(xfb_entry->texture.get(), xfb_rect);
DrawImGui();
// Update the window size based on the frame that was just rendered.
// Due to depending on guest state, we need to call this every frame.
SetWindowSize(texture_config.width, texture_config.height);
// Present to the window system.
{
std::lock_guard<std::mutex> guard(m_swap_mutex);
PresentBackbuffer();
}
// Update the window size based on the frame that was just rendered.
// Due to depending on guest state, we need to call this every frame.
SetWindowSize(texture_config.width, texture_config.height);
}
m_fps_counter.Update();
@ -984,12 +1238,11 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
if (IsFrameDumping())
DumpCurrentFrame();
frameCount++;
GFX_DEBUGGER_PAUSE_AT(NEXT_FRAME, true);
// Begin new frame
m_frame_count++;
stats.ResetFrame();
g_shader_cache->RetrieveAsyncShaders();
g_vertex_manager->OnEndFrame();
BeginImGuiFrame();
// We invalidate the pipeline object at the start of the frame.
@ -1003,13 +1256,13 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
g_texture_cache->FlushEFBCopies();
// Remove stale EFB/XFB copies.
g_texture_cache->Cleanup(frameCount);
g_texture_cache->Cleanup(m_frame_count);
// Handle any config changes, this gets propogated to the backend.
CheckForConfigChanges();
g_Config.iSaveTargetId = 0;
RestoreAPIState();
EndUtilityDrawing();
Core::Callback_VideoCopiedToXFB(true);
}
@ -1028,6 +1281,24 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
}
}
void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc)
{
const auto target_rc = GetTargetRectangle();
if (g_ActiveConfig.stereo_mode == StereoMode::SBS ||
g_ActiveConfig.stereo_mode == StereoMode::TAB)
{
TargetRectangle left_rc, right_rc;
std::tie(left_rc, right_rc) = ConvertStereoRectangle(target_rc);
m_post_processor->BlitFromTexture(left_rc, rc, texture, 0);
m_post_processor->BlitFromTexture(right_rc, rc, texture, 1);
}
else
{
m_post_processor->BlitFromTexture(target_rc, rc, texture, 0);
}
}
bool Renderer::IsFrameDumping()
{
if (m_screenshot_request.IsSet())
@ -1040,15 +1311,6 @@ bool Renderer::IsFrameDumping()
}
void Renderer::DumpCurrentFrame()
{
// Scale/render to frame dump texture.
RenderFrameDump();
// Queue a readback for the next frame.
QueueFrameDumpReadback();
}
void Renderer::RenderFrameDump()
{
int target_width, target_height;
if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless())
@ -1063,50 +1325,76 @@ void Renderer::RenderFrameDump()
m_last_xfb_texture->GetConfig().width, m_last_xfb_texture->GetConfig().height);
}
// Ensure framebuffer exists (we lazily allocate it in case frame dumping isn't used).
// Or, resize texture if it isn't large enough to accommodate the current frame.
if (!m_frame_dump_render_texture ||
m_frame_dump_render_texture->GetConfig().width != static_cast<u32>(target_width) ||
m_frame_dump_render_texture->GetConfig().height != static_cast<u32>(target_height))
// We only need to render a copy if we need to stretch/scale the XFB copy.
const AbstractTexture* source_tex = m_last_xfb_texture;
MathUtil::Rectangle<int> source_rect = m_last_xfb_region;
if (source_rect.GetWidth() != target_width || source_rect.GetHeight() != target_height)
{
// Recreate texture objects. Release before creating so we don't temporarily use twice the RAM.
TextureConfig config(target_width, target_height, 1, 1, 1, AbstractTextureFormat::RGBA8, true);
m_frame_dump_render_texture.reset();
m_frame_dump_render_texture = CreateTexture(config);
ASSERT(m_frame_dump_render_texture);
if (!CheckFrameDumpRenderTexture(target_width, target_height))
return;
source_tex = m_frame_dump_render_texture.get();
source_rect = MathUtil::Rectangle<int>(0, 0, target_width, target_height);
ScaleTexture(m_frame_dump_render_framebuffer.get(), source_rect, m_last_xfb_texture,
m_last_xfb_region);
}
// Scaling is likely to occur here, but if possible, do a bit-for-bit copy.
if (m_last_xfb_region.GetWidth() != target_width ||
m_last_xfb_region.GetHeight() != target_height)
{
m_frame_dump_render_texture->ScaleRectangleFromTexture(
m_last_xfb_texture, m_last_xfb_region, EFBRectangle{0, 0, target_width, target_height});
}
else
{
m_frame_dump_render_texture->CopyRectangleFromTexture(
m_last_xfb_texture, m_last_xfb_region, 0, 0,
EFBRectangle{0, 0, target_width, target_height}, 0, 0);
}
}
void Renderer::QueueFrameDumpReadback()
{
// Index 0 was just sent to AVI dump. Swap with the second texture.
if (m_frame_dump_readback_textures[0])
std::swap(m_frame_dump_readback_textures[0], m_frame_dump_readback_textures[1]);
std::unique_ptr<AbstractStagingTexture>& rbtex = m_frame_dump_readback_textures[0];
if (!rbtex || rbtex->GetConfig() != m_frame_dump_render_texture->GetConfig())
{
rbtex = CreateStagingTexture(StagingTextureType::Readback,
m_frame_dump_render_texture->GetConfig());
}
if (!CheckFrameDumpReadbackTexture(target_width, target_height))
return;
const auto converted_region =
ConvertFramebufferRectangle(source_rect, source_tex->GetWidth(), source_tex->GetHeight());
m_frame_dump_readback_textures[0]->CopyFromTexture(
source_tex, converted_region, 0, 0,
MathUtil::Rectangle<int>(0, 0, target_width, target_height));
m_last_frame_state = AVIDump::FetchState(m_last_xfb_ticks);
m_last_frame_exported = true;
rbtex->CopyFromTexture(m_frame_dump_render_texture.get(), 0, 0);
}
bool Renderer::CheckFrameDumpRenderTexture(u32 target_width, u32 target_height)
{
// Ensure framebuffer exists (we lazily allocate it in case frame dumping isn't used).
// Or, resize texture if it isn't large enough to accommodate the current frame.
if (m_frame_dump_render_texture && m_frame_dump_render_texture->GetWidth() == target_width &&
m_frame_dump_render_texture->GetHeight() == target_height)
{
return true;
}
// Recreate texture, but release before creating so we don't temporarily use twice the RAM.
m_frame_dump_render_framebuffer.reset();
m_frame_dump_render_texture.reset();
m_frame_dump_render_texture =
CreateTexture(TextureConfig(target_width, target_height, 1, 1, 1,
AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget));
if (!m_frame_dump_render_texture)
{
PanicAlert("Failed to allocate frame dump render texture");
return false;
}
m_frame_dump_render_framebuffer = CreateFramebuffer(m_frame_dump_render_texture.get(), nullptr);
ASSERT(m_frame_dump_render_framebuffer);
return true;
}
bool Renderer::CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height)
{
std::unique_ptr<AbstractStagingTexture>& rbtex = m_frame_dump_readback_textures[0];
if (rbtex && rbtex->GetWidth() == target_width && rbtex->GetHeight() == target_height)
return true;
rbtex.reset();
rbtex = CreateStagingTexture(
StagingTextureType::Readback,
TextureConfig(target_width, target_height, 1, 1, 1, AbstractTextureFormat::RGBA8, 0));
if (!rbtex)
return false;
return true;
}
void Renderer::FlushFrameDump()
@ -1151,6 +1439,7 @@ void Renderer::ShutdownFrameDumping()
m_frame_dump_start.Set();
if (m_frame_dump_thread.joinable())
m_frame_dump_thread.join();
m_frame_dump_render_framebuffer.reset();
m_frame_dump_render_texture.reset();
for (auto& tex : m_frame_dump_readback_textures)
tex.reset();

View File

@ -41,22 +41,26 @@ class AbstractShader;
class AbstractTexture;
class AbstractStagingTexture;
class NativeVertexFormat;
class PostProcessingShaderImplementation;
struct TextureConfig;
struct ComputePipelineConfig;
struct AbstractPipelineConfig;
struct PortableVertexDeclaration;
enum class ShaderStage;
enum class EFBAccessType;
enum class EFBReinterpretType;
enum class StagingTextureType;
namespace VideoCommon
{
class PostProcessing;
}
struct EfbPokeData
{
u16 x, y;
u32 data;
};
extern int frameCount;
// Renderer really isn't a very good name for this class - it's more like "Misc".
// The long term goal is to get rid of this class and replace it with others that make
// more sense.
@ -78,37 +82,38 @@ public:
virtual void SetScissorRect(const MathUtil::Rectangle<int>& rc) {}
virtual void SetTexture(u32 index, const AbstractTexture* texture) {}
virtual void SetSamplerState(u32 index, const SamplerState& state) {}
virtual void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) {}
virtual void UnbindTexture(const AbstractTexture* texture) {}
virtual void SetInterlacingMode() {}
virtual void SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth)
{
}
virtual void SetFullscreen(bool enable_fullscreen) {}
virtual bool IsFullscreen() const { return false; }
virtual void ApplyState() {}
virtual void RestoreState() {}
virtual void ResetAPIState() {}
virtual void RestoreAPIState() {}
virtual void BeginUtilityDrawing();
virtual void EndUtilityDrawing();
virtual std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) = 0;
virtual std::unique_ptr<AbstractStagingTexture>
CreateStagingTexture(StagingTextureType type, const TextureConfig& config) = 0;
virtual std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(const AbstractTexture* color_attachment,
const AbstractTexture* depth_attachment) = 0;
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) = 0;
// Framebuffer operations.
virtual void SetFramebuffer(const AbstractFramebuffer* framebuffer) {}
virtual void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) {}
virtual void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
const ClearColor& color_value = {}, float depth_value = 0.0f)
{
}
virtual void SetFramebuffer(AbstractFramebuffer* framebuffer);
virtual void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer);
virtual void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer,
const ClearColor& color_value = {}, float depth_value = 0.0f);
// Drawing with currently-bound pipeline state.
virtual void Draw(u32 base_vertex, u32 num_vertices) {}
virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {}
// Dispatching compute shaders with currently-bound state.
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
{
}
// Binds the backbuffer for rendering. The buffer will be cleared immediately after binding.
// This is where any window size changes are detected, therefore m_backbuffer_width and/or
// m_backbuffer_height may change after this function returns.
@ -122,12 +127,15 @@ public:
CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) = 0;
virtual std::unique_ptr<AbstractShader>
CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) = 0;
virtual std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0;
virtual std::unique_ptr<AbstractPipeline>
CreatePipeline(const AbstractPipelineConfig& config) = 0;
std::unique_ptr<AbstractShader> CreateShaderFromSource(ShaderStage stage,
const std::string& source);
AbstractFramebuffer* GetCurrentFramebuffer() const { return m_current_framebuffer; }
const AbstractFramebuffer* GetCurrentFramebuffer() const { return m_current_framebuffer; }
u32 GetCurrentFramebufferWidth() const { return m_current_framebuffer_width; }
u32 GetCurrentFramebufferHeight() const { return m_current_framebuffer_height; }
// Ideal internal resolution - multiple of the native EFB resolution
int GetTargetWidth() const { return m_target_width; }
int GetTargetHeight() const { return m_target_height; }
@ -137,10 +145,27 @@ public:
float GetBackbufferScale() const { return m_backbuffer_scale; }
void SetWindowSize(int width, int height);
// EFB coordinate conversion functions
// Sets viewport and scissor to the specified rectangle. rect is assumed to be in framebuffer
// coordinates, i.e. lower-left origin in OpenGL.
void SetViewportAndScissor(const MathUtil::Rectangle<int>& rect, float min_depth = 0.0f,
float max_depth = 1.0f);
// Scales a GPU texture using a copy shader.
virtual void ScaleTexture(AbstractFramebuffer* dst_framebuffer,
const MathUtil::Rectangle<int>& dst_rect,
const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect);
// Converts an upper-left to lower-left if required by the backend, optionally
// clamping to the framebuffer size.
MathUtil::Rectangle<int> ConvertFramebufferRectangle(const MathUtil::Rectangle<int>& rect,
u32 fb_width, u32 fb_height);
MathUtil::Rectangle<int> ConvertFramebufferRectangle(const MathUtil::Rectangle<int>& rect,
const AbstractFramebuffer* framebuffer);
// EFB coordinate conversion functions
// Use this to convert a whole native EFB rect to backbuffer coordinates
virtual TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) = 0;
TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc);
const TargetRectangle& GetTargetRectangle() const { return m_target_rectangle; }
float CalculateDrawAspectRatio() const;
@ -170,18 +195,20 @@ public:
bool InitializeImGui();
virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) = 0;
virtual void ReinterpretPixelData(unsigned int convtype) = 0;
u32 color, u32 z);
virtual void ReinterpretPixelData(EFBReinterpretType convtype);
void RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight,
float Gamma = 1.0f);
virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0;
virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) = 0;
virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data);
virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points);
virtual u16 BBoxRead(int index) = 0;
virtual void BBoxWrite(int index, u16 value) = 0;
virtual void BBoxFlush() {}
virtual void Flush() {}
virtual void WaitForGPUIdle() {}
// Finish up the current frame, print some stats
void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
@ -189,14 +216,14 @@ public:
// Draws the specified XFB buffer to the screen, performing any post-processing.
// Assumes that the backbuffer has already been bound and cleared.
virtual void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) {}
virtual void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc);
// Called when the configuration changes, and backend structures need to be updated.
virtual void OnConfigChanged(u32 bits) {}
PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; }
void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; }
PostProcessingShaderImplementation* GetPostProcessor() const { return m_post_processor.get(); }
VideoCommon::PostProcessing* GetPostProcessor() const { return m_post_processor.get(); }
// Final surface changing
// This is called when the surface is resized (WX) or the window changes (Android).
void ChangeSurface(void* new_surface_handle);
@ -246,12 +273,10 @@ protected:
// Renders ImGui windows to the currently-bound framebuffer.
// Should be called with the ImGui lock held.
void RenderImGui();
void DrawImGui();
// TODO: Remove the width/height parameters once we make the EFB an abstract framebuffer.
const AbstractFramebuffer* m_current_framebuffer = nullptr;
u32 m_current_framebuffer_width = 1;
u32 m_current_framebuffer_height = 1;
AbstractFramebuffer* m_current_framebuffer = nullptr;
const AbstractPipeline* m_current_pipeline = nullptr;
Common::Flag m_screenshot_request;
Common::Event m_screenshot_completed;
@ -260,8 +285,8 @@ protected:
bool m_aspect_wide = false;
// The framebuffer size
int m_target_width = 0;
int m_target_height = 0;
int m_target_width = 1;
int m_target_height = 1;
// Backbuffer (window) size and render area
int m_backbuffer_width = 0;
@ -269,10 +294,11 @@ protected:
float m_backbuffer_scale = 1.0f;
AbstractTextureFormat m_backbuffer_format = AbstractTextureFormat::Undefined;
TargetRectangle m_target_rectangle = {};
int m_frame_count = 0;
FPSCounter m_fps_counter;
std::unique_ptr<PostProcessingShaderImplementation> m_post_processor;
std::unique_ptr<VideoCommon::PostProcessing> m_post_processor;
void* m_new_surface_handle = nullptr;
Common::Flag m_surface_changed;
@ -315,6 +341,7 @@ private:
// Texture used for screenshot/frame dumping
std::unique_ptr<AbstractTexture> m_frame_dump_render_texture;
std::unique_ptr<AbstractFramebuffer> m_frame_dump_render_framebuffer;
std::array<std::unique_ptr<AbstractStagingTexture>, 2> m_frame_dump_readback_textures;
AVIDump::Frame m_last_frame_state;
bool m_last_frame_exported = false;
@ -340,15 +367,15 @@ private:
bool IsFrameDumping();
// Asynchronously encodes the current staging texture to the frame dump.
// Checks that the frame dump render texture exists and is the correct size.
bool CheckFrameDumpRenderTexture(u32 target_width, u32 target_height);
// Checks that the frame dump readback texture exists and is the correct size.
bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height);
// Fills the frame dump staging texture with the current XFB texture.
void DumpCurrentFrame();
// Fills the frame dump render texture with the current XFB texture.
void RenderFrameDump();
// Queues the current frame for readback, which will be written to AVI next frame.
void QueueFrameDumpReadback();
// Asynchronously encodes the specified pointer of frame data to the frame dump.
void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state);

View File

@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureConfig.h"
void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_type)
{
@ -23,6 +24,12 @@ RasterizationState& RasterizationState::operator=(const RasterizationState& rhs)
return *this;
}
FramebufferState& FramebufferState::operator=(const FramebufferState& rhs)
{
hex = rhs.hex;
return *this;
}
void DepthState::Generate(const BPMemory& bp)
{
testenable = bp.zmode.testenable.Value();
@ -206,10 +213,19 @@ RasterizationState GetInvalidRasterizationState()
return state;
}
RasterizationState GetNoCullRasterizationState()
RasterizationState GetNoCullRasterizationState(PrimitiveType primitive)
{
RasterizationState state = {};
state.cullmode = GenMode::CULL_NONE;
state.primitive = primitive;
return state;
}
RasterizationState GetCullBackFaceRasterizationState(PrimitiveType primitive)
{
RasterizationState state = {};
state.cullmode = GenMode::CULL_BACK;
state.primitive = primitive;
return state;
}
@ -220,7 +236,7 @@ DepthState GetInvalidDepthState()
return state;
}
DepthState GetNoDepthTestingDepthStencilState()
DepthState GetNoDepthTestingDepthState()
{
DepthState state = {};
state.testenable = false;
@ -229,6 +245,15 @@ DepthState GetNoDepthTestingDepthStencilState()
return state;
}
DepthState GetAlwaysWriteDepthState()
{
DepthState state = {};
state.testenable = true;
state.updateenable = true;
state.func = ZMode::ALWAYS;
return state;
}
BlendingState GetInvalidBlendingState()
{
BlendingState state;
@ -251,6 +276,21 @@ BlendingState GetNoBlendingBlendState()
return state;
}
BlendingState GetNoColorWriteBlendState()
{
BlendingState state = {};
state.usedualsrc = false;
state.blendenable = false;
state.srcfactor = BlendMode::ONE;
state.srcfactoralpha = BlendMode::ONE;
state.dstfactor = BlendMode::ZERO;
state.dstfactoralpha = BlendMode::ZERO;
state.logicopenable = false;
state.colorupdate = false;
state.alphaupdate = false;
return state;
}
SamplerState GetInvalidSamplerState()
{
SamplerState state;
@ -287,4 +327,20 @@ SamplerState GetLinearSamplerState()
state.anisotropic_filtering = false;
return state;
}
FramebufferState GetColorFramebufferState(AbstractTextureFormat format)
{
FramebufferState state = {};
state.color_texture_format = format;
state.depth_texture_format = AbstractTextureFormat::Undefined;
state.per_sample_shading = false;
state.samples = 1;
return state;
}
FramebufferState GetRGBA8FramebufferState()
{
return GetColorFramebufferState(AbstractTextureFormat::RGBA8);
}
} // namespace RenderState

View File

@ -9,6 +9,8 @@
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/BPStructs.h"
enum class AbstractTextureFormat : u32;
enum class PrimitiveType : u32
{
Points,
@ -32,6 +34,20 @@ union RasterizationState
u32 hex;
};
union FramebufferState
{
BitField<0, 8, AbstractTextureFormat> color_texture_format;
BitField<8, 8, AbstractTextureFormat> depth_texture_format;
BitField<16, 8, u32> samples;
BitField<24, 1, u32> per_sample_shading;
bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; }
bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; }
FramebufferState& operator=(const FramebufferState& rhs);
u32 hex;
};
union DepthState
{
void Generate(const BPMemory& bp);
@ -114,12 +130,17 @@ union SamplerState
namespace RenderState
{
RasterizationState GetInvalidRasterizationState();
RasterizationState GetNoCullRasterizationState();
RasterizationState GetNoCullRasterizationState(PrimitiveType primitive);
RasterizationState GetCullBackFaceRasterizationState(PrimitiveType primitive);
DepthState GetInvalidDepthState();
DepthState GetNoDepthTestingDepthStencilState();
DepthState GetNoDepthTestingDepthState();
DepthState GetAlwaysWriteDepthState();
BlendingState GetInvalidBlendingState();
BlendingState GetNoBlendingBlendState();
BlendingState GetNoColorWriteBlendState();
SamplerState GetInvalidSamplerState();
SamplerState GetPointSamplerState();
SamplerState GetLinearSamplerState();
}
FramebufferState GetColorFramebufferState(AbstractTextureFormat format);
FramebufferState GetRGBA8FramebufferState();
} // namespace RenderState

View File

@ -9,7 +9,8 @@
#include "Common/MsgHandler.h"
#include "Core/ConfigManager.h"
#include "VideoCommon/FramebufferManagerBase.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/FramebufferShaderGen.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
@ -22,17 +23,26 @@ std::unique_ptr<VideoCommon::ShaderCache> g_shader_cache;
namespace VideoCommon
{
ShaderCache::ShaderCache() = default;
ShaderCache::~ShaderCache() = default;
ShaderCache::~ShaderCache()
{
ClearShaderCaches();
ClearPipelineCaches();
}
bool ShaderCache::Initialize()
{
m_api_type = g_ActiveConfig.backend_info.api_type;
m_host_config = ShaderHostConfig::GetCurrent();
m_efb_depth_format = FramebufferManagerBase::GetEFBDepthFormat();
m_efb_multisamples = g_ActiveConfig.iMultisamples;
// Create the async compiler, and start the worker threads.
if (!CompileSharedPipelines())
return false;
m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler();
return true;
}
void ShaderCache::InitializeShaderCache()
{
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
// Load shader and UID caches.
@ -53,17 +63,6 @@ bool ShaderCache::Initialize()
// Switch to the runtime shader compiler thread configuration.
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
return true;
}
void ShaderCache::SetHostConfig(const ShaderHostConfig& host_config, u32 efb_multisamples)
{
if (m_host_config.bits == host_config.bits && m_efb_multisamples == efb_multisamples)
return;
m_host_config = host_config;
m_efb_multisamples = efb_multisamples;
Reload();
}
void ShaderCache::Reload()
@ -99,8 +98,6 @@ void ShaderCache::Shutdown()
// until everything has finished compiling.
m_async_shader_compiler->StopWorkerThreads();
ClosePipelineUIDCache();
ClearShaderCaches();
ClearPipelineCaches();
}
const AbstractPipeline* ShaderCache::GetPipelineForUid(const GXPipelineUid& uid)
@ -445,6 +442,11 @@ bool ShaderCache::NeedsGeometryShader(const GeometryShaderUid& uid) const
return m_host_config.backend_geometry_shaders && !uid.GetUidData()->IsPassthrough();
}
bool ShaderCache::UseGeometryShaderForEFBCopies() const
{
return m_host_config.backend_geometry_shaders && m_host_config.stereo;
}
AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader,
const AbstractShader* geometry_shader, const AbstractShader* pixel_shader,
@ -460,10 +462,7 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
config.rasterization_state = rasterization_state;
config.depth_state = depth_state;
config.blending_state = blending_state;
config.framebuffer_state.color_texture_format = AbstractTextureFormat::RGBA8;
config.framebuffer_state.depth_texture_format = m_efb_depth_format;
config.framebuffer_state.per_sample_shading = m_host_config.ssaa;
config.framebuffer_state.samples = m_efb_multisamples;
config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState();
return config;
}
@ -967,8 +966,9 @@ void ShaderCache::QueueUberShaderPipelines()
config.vs_uid = vs_uid;
config.gs_uid = gs_uid;
config.ps_uid = ps_uid;
config.rasterization_state = RenderState::GetNoCullRasterizationState();
config.depth_state = RenderState::GetNoDepthTestingDepthStencilState();
config.rasterization_state =
RenderState::GetCullBackFaceRasterizationState(PrimitiveType::TriangleStrip);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
auto iter = m_gx_uber_pipeline_cache.find(config);
@ -998,24 +998,172 @@ void ShaderCache::QueueUberShaderPipelines()
});
}
std::string ShaderCache::GetUtilityShaderHeader() const
const AbstractPipeline*
ShaderCache::GetEFBCopyToVRAMPipeline(const TextureConversionShaderGen::TCShaderUid& uid)
{
std::stringstream ss;
auto iter = m_efb_copy_to_vram_pipelines.find(uid);
if (iter != m_efb_copy_to_vram_pipelines.end())
return iter->second.get();
ss << "#define API_D3D " << (m_api_type == APIType::D3D ? 1 : 0) << "\n";
ss << "#define API_OPENGL " << (m_api_type == APIType::OpenGL ? 1 : 0) << "\n";
ss << "#define API_VULKAN " << (m_api_type == APIType::Vulkan ? 1 : 0) << "\n";
if (m_efb_multisamples > 1)
auto shader_code = TextureConversionShaderGen::GeneratePixelShader(m_api_type, uid.GetUidData());
auto shader = g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_code.GetBuffer());
if (!shader)
{
ss << "#define MSAA_ENABLED 1" << std::endl;
ss << "#define MSAA_SAMPLES " << m_efb_multisamples << std::endl;
if (m_host_config.ssaa)
ss << "#define SSAA_ENABLED 1" << std::endl;
m_efb_copy_to_vram_pipelines.emplace(uid, nullptr);
return nullptr;
}
ss << "#define EFB_LAYERS " << (m_host_config.stereo ? 2 : 1) << std::endl;
return ss.str();
AbstractPipelineConfig config = {};
config.vertex_format = nullptr;
config.vertex_shader = m_efb_copy_vertex_shader.get();
config.geometry_shader =
UseGeometryShaderForEFBCopies() ? m_texcoord_geometry_shader.get() : nullptr;
config.pixel_shader = shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetRGBA8FramebufferState();
config.usage = AbstractPipelineUsage::Utility;
auto iiter = m_efb_copy_to_vram_pipelines.emplace(uid, g_renderer->CreatePipeline(config));
return iiter.first->second.get();
}
const AbstractPipeline* ShaderCache::GetEFBCopyToRAMPipeline(const EFBCopyParams& uid)
{
auto iter = m_efb_copy_to_ram_pipelines.find(uid);
if (iter != m_efb_copy_to_ram_pipelines.end())
return iter->second.get();
auto shader_code = TextureConversionShaderTiled::GenerateEncodingShader(uid, m_api_type);
auto shader =
g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_code, std::strlen(shader_code));
if (!shader)
{
m_efb_copy_to_ram_pipelines.emplace(uid, nullptr);
return nullptr;
}
AbstractPipelineConfig config = {};
config.vertex_format = nullptr;
config.vertex_shader = m_screen_quad_vertex_shader.get();
config.geometry_shader =
UseGeometryShaderForEFBCopies() ? m_texcoord_geometry_shader.get() : nullptr;
config.pixel_shader = shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetColorFramebufferState(AbstractTextureFormat::BGRA8);
config.usage = AbstractPipelineUsage::Utility;
auto iiter = m_efb_copy_to_ram_pipelines.emplace(uid, g_renderer->CreatePipeline(config));
return iiter.first->second.get();
}
bool ShaderCache::CompileSharedPipelines()
{
m_screen_quad_vertex_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Vertex, FramebufferShaderGen::GenerateScreenQuadVertexShader());
m_texture_copy_vertex_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Vertex, FramebufferShaderGen::GenerateTextureCopyVertexShader());
m_efb_copy_vertex_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Vertex,
TextureConversionShaderGen::GenerateVertexShader(m_api_type).GetBuffer());
if (!m_screen_quad_vertex_shader || !m_texture_copy_vertex_shader || !m_efb_copy_vertex_shader)
return false;
if (UseGeometryShaderForEFBCopies())
{
m_texcoord_geometry_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Geometry, FramebufferShaderGen::GeneratePassthroughGeometryShader(1, 0));
m_color_geometry_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Geometry, FramebufferShaderGen::GeneratePassthroughGeometryShader(0, 1));
if (!m_texcoord_geometry_shader || !m_color_geometry_shader)
return false;
}
m_texture_copy_pixel_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, FramebufferShaderGen::GenerateTextureCopyPixelShader());
m_color_pixel_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, FramebufferShaderGen::GenerateColorPixelShader());
if (!m_texture_copy_pixel_shader || !m_color_pixel_shader)
return false;
AbstractPipelineConfig config;
config.vertex_format = nullptr;
config.vertex_shader = m_texture_copy_vertex_shader.get();
config.geometry_shader = nullptr;
config.pixel_shader = m_texture_copy_pixel_shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetRGBA8FramebufferState();
config.usage = AbstractPipelineUsage::Utility;
m_copy_rgba8_pipeline = g_renderer->CreatePipeline(config);
if (!m_copy_rgba8_pipeline)
return false;
if (UseGeometryShaderForEFBCopies())
{
config.geometry_shader = m_texcoord_geometry_shader.get();
m_rgba8_stereo_copy_pipeline = g_renderer->CreatePipeline(config);
if (!m_rgba8_stereo_copy_pipeline)
return false;
}
if (m_host_config.backend_palette_conversion)
{
config.vertex_shader = m_screen_quad_vertex_shader.get();
config.geometry_shader = nullptr;
for (size_t i = 0; i < NUM_PALETTE_CONVERSION_SHADERS; i++)
{
auto shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, TextureConversionShaderTiled::GeneratePaletteConversionShader(
static_cast<TLUTFormat>(i), m_api_type));
if (!shader)
return false;
config.pixel_shader = shader.get();
m_palette_conversion_pipelines[i] = g_renderer->CreatePipeline(config);
if (!m_palette_conversion_pipelines[i])
return false;
}
}
return true;
}
const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat format)
{
ASSERT(static_cast<size_t>(format) < NUM_PALETTE_CONVERSION_SHADERS);
return m_palette_conversion_pipelines[static_cast<size_t>(format)].get();
}
const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format,
TLUTFormat palette_format)
{
const auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = m_texture_decoding_shaders.find(key);
if (iter != m_texture_decoding_shaders.end())
return iter->second.get();
std::string shader_source =
TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
if (shader_source.empty())
{
m_texture_decoding_shaders.emplace(key, nullptr);
return nullptr;
}
std::unique_ptr<AbstractShader> shader =
g_renderer->CreateShaderFromSource(ShaderStage::Compute, shader_source);
if (!shader)
{
m_texture_decoding_shaders.emplace(key, nullptr);
return nullptr;
}
auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader));
return iiter.first->second.get();
}
} // namespace VideoCommon

View File

@ -25,12 +25,16 @@
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"
class NativeVertexFormat;
enum class AbstractTextureFormat : u32;
enum class TLUTFormat;
namespace VideoCommon
{
@ -44,8 +48,11 @@ public:
bool Initialize();
void Shutdown();
// Changes the shader host config. Shaders will be reloaded if there are changes.
void SetHostConfig(const ShaderHostConfig& host_config, u32 efb_multisamples);
// Compiles/loads cached shaders.
void InitializeShaderCache();
// Changes the shader host config. Shaders should be reloaded afterwards.
void SetHostConfig(const ShaderHostConfig& host_config) { m_host_config = host_config; }
// Reloads/recreates all shaders and pipelines.
void Reload();
@ -53,9 +60,6 @@ public:
// Retrieves all pending shaders/pipelines from the async compiler.
void RetrieveAsyncShaders();
// Get utility shader header based on current config.
std::string GetUtilityShaderHeader() const;
// Accesses ShaderGen shader caches
const AbstractPipeline* GetPipelineForUid(const GXPipelineUid& uid);
const AbstractPipeline* GetUberPipelineForUid(const GXUberPipelineUid& uid);
@ -64,7 +68,48 @@ public:
// The optional will be empty if this pipeline is now background compiling.
std::optional<const AbstractPipeline*> GetPipelineForUidAsync(const GXPipelineUid& uid);
// Shared shaders
const AbstractShader* GetScreenQuadVertexShader() const
{
return m_screen_quad_vertex_shader.get();
}
const AbstractShader* GetTextureCopyVertexShader() const
{
return m_texture_copy_vertex_shader.get();
}
const AbstractShader* GetEFBCopyVertexShader() const { return m_efb_copy_vertex_shader.get(); }
const AbstractShader* GetTexcoordGeometryShader() const
{
return m_texcoord_geometry_shader.get();
}
const AbstractShader* GetTextureCopyPixelShader() const
{
return m_texture_copy_pixel_shader.get();
}
const AbstractShader* GetColorGeometryShader() const { return m_color_geometry_shader.get(); }
const AbstractShader* GetColorPixelShader() const { return m_color_pixel_shader.get(); }
// EFB copy to RAM/VRAM pipelines
const AbstractPipeline*
GetEFBCopyToVRAMPipeline(const TextureConversionShaderGen::TCShaderUid& uid);
const AbstractPipeline* GetEFBCopyToRAMPipeline(const EFBCopyParams& uid);
// RGBA8 framebuffer copy pipelines
const AbstractPipeline* GetRGBA8CopyPipeline() const { return m_copy_rgba8_pipeline.get(); }
const AbstractPipeline* GetRGBA8StereoCopyPipeline() const
{
return m_rgba8_stereo_copy_pipeline.get();
}
// Palette texture conversion pipelines
const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format);
// Texture decoding compute shaders
const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format);
private:
static constexpr size_t NUM_PALETTE_CONVERSION_SHADERS = 3;
void WaitForAsyncCompiler();
void LoadShaderCaches();
void ClearShaderCaches();
@ -74,6 +119,7 @@ private:
void InvalidateCachedPipelines();
void ClearPipelineCaches();
void QueueUberShaderPipelines();
bool CompileSharedPipelines();
// GX shader compiler methods
std::unique_ptr<AbstractShader> CompileVertexShader(const VertexShaderUid& uid) const;
@ -93,6 +139,9 @@ private:
const AbstractShader* CreateGeometryShader(const GeometryShaderUid& uid);
bool NeedsGeometryShader(const GeometryShaderUid& uid) const;
// Should we use geometry shaders for EFB copies?
bool UseGeometryShaderForEFBCopies() const;
// GX pipeline compiler methods
AbstractPipelineConfig
GetGXPipelineConfig(const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader,
@ -130,10 +179,17 @@ private:
// Configuration bits.
APIType m_api_type = APIType::Nothing;
ShaderHostConfig m_host_config = {};
AbstractTextureFormat m_efb_depth_format;
u32 m_efb_multisamples = 1;
std::unique_ptr<AsyncShaderCompiler> m_async_shader_compiler;
// Shared shaders
std::unique_ptr<AbstractShader> m_screen_quad_vertex_shader;
std::unique_ptr<AbstractShader> m_texture_copy_vertex_shader;
std::unique_ptr<AbstractShader> m_efb_copy_vertex_shader;
std::unique_ptr<AbstractShader> m_texcoord_geometry_shader;
std::unique_ptr<AbstractShader> m_color_geometry_shader;
std::unique_ptr<AbstractShader> m_texture_copy_pixel_shader;
std::unique_ptr<AbstractShader> m_color_pixel_shader;
// GX Shader Caches
template <typename Uid>
struct ShaderModuleCache
@ -157,6 +213,22 @@ private:
std::map<GXUberPipelineUid, std::pair<std::unique_ptr<AbstractPipeline>, bool>>
m_gx_uber_pipeline_cache;
File::IOFile m_gx_pipeline_uid_cache_file;
// EFB copy to VRAM/RAM pipelines
std::map<TextureConversionShaderGen::TCShaderUid, std::unique_ptr<AbstractPipeline>>
m_efb_copy_to_vram_pipelines;
std::map<EFBCopyParams, std::unique_ptr<AbstractPipeline>> m_efb_copy_to_ram_pipelines;
// Copy pipeline for RGBA8 textures
std::unique_ptr<AbstractPipeline> m_copy_rgba8_pipeline;
std::unique_ptr<AbstractPipeline> m_rgba8_stereo_copy_pipeline;
// Palette conversion pipelines
std::array<std::unique_ptr<AbstractPipeline>, NUM_PALETTE_CONVERSION_SHADERS>
m_palette_conversion_pipelines;
// Texture decoding shaders
std::map<std::pair<u32, u32>, std::unique_ptr<AbstractShader>> m_texture_decoding_shaders;
};
} // namespace VideoCommon

View File

@ -34,6 +34,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing;
bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp;
bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion;
return bits;
}

View File

@ -181,7 +181,8 @@ union ShaderHostConfig
u32 backend_dynamic_sampler_indexing : 1;
u32 backend_shader_framebuffer_fetch : 1;
u32 backend_logic_op : 1;
u32 pad : 10;
u32 backend_palette_conversion : 1;
u32 pad : 9;
};
static ShaderHostConfig GetCurrent();
@ -216,7 +217,7 @@ template <class T>
inline void GenerateVSOutputMembers(T& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, const char* qualifier)
{
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "POSITION");
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "SV_Position");
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1);

View File

@ -28,17 +28,21 @@
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FramebufferManagerBase.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/HiresTextures.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
@ -51,8 +55,9 @@ std::unique_ptr<TextureCacheBase> g_texture_cache;
std::bitset<8> TextureCacheBase::valid_bind_points;
TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr<AbstractTexture> tex)
: texture(std::move(tex))
TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr<AbstractTexture> tex,
std::unique_ptr<AbstractFramebuffer> fb)
: texture(std::move(tex)), framebuffer(std::move(fb))
{
}
@ -89,6 +94,25 @@ TextureCacheBase::TextureCacheBase()
InvalidateAllBindPoints();
}
TextureCacheBase::~TextureCacheBase()
{
HiresTexture::Shutdown();
Invalidate();
Common::FreeAlignedMemory(temp);
temp = nullptr;
}
bool TextureCacheBase::Initialize()
{
if (!CreateUtilityTextures())
{
PanicAlert("Failed to create utility textures.");
return false;
}
return true;
}
void TextureCacheBase::Invalidate()
{
FlushEFBCopies();
@ -108,14 +132,6 @@ void TextureCacheBase::Invalidate()
texture_pool.clear();
}
TextureCacheBase::~TextureCacheBase()
{
HiresTexture::Shutdown();
Invalidate();
Common::FreeAlignedMemory(temp);
temp = nullptr;
}
void TextureCacheBase::OnConfigChanged(VideoConfig& config)
{
if (config.bHiresTextures != backup_config.hires_textures ||
@ -139,14 +155,6 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
g_ActiveConfig.bTexFmtOverlayCenter);
}
if ((config.stereo_mode != StereoMode::Off) != backup_config.stereo_3d ||
config.bStereoEFBMonoDepth != backup_config.efb_mono_depth)
{
g_texture_cache->DeleteShaders();
if (!g_texture_cache->CompileShaders())
PanicAlert("Failed to recompile one or more texture conversion shaders.");
}
SetBackupConfig(config);
}
@ -243,7 +251,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma
{
TextureConfig new_config = entry->texture->GetConfig();
new_config.levels = 1;
new_config.rendertarget = true;
new_config.flags |= AbstractTextureFlag_RenderTarget;
TCacheEntry* decoded_entry = AllocateCacheEntry(new_config);
if (!decoded_entry)
@ -279,29 +287,27 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
return;
}
TextureConfig newconfig;
newconfig.width = new_width;
newconfig.height = new_height;
newconfig.layers = entry->GetNumLayers();
newconfig.rendertarget = true;
std::unique_ptr<AbstractTexture> new_texture = AllocateTexture(newconfig);
if (new_texture)
const TextureConfig newconfig(new_width, new_height, 1, entry->GetNumLayers(), 1,
AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget);
std::optional<TexPoolEntry> new_texture = AllocateTexture(newconfig);
if (!new_texture)
{
new_texture->ScaleRectangleFromTexture(entry->texture.get(),
entry->texture->GetConfig().GetRect(),
new_texture->GetConfig().GetRect());
entry->texture.swap(new_texture);
ERROR_LOG(VIDEO, "Scaling failed due to texture allocation failure");
return;
}
auto config = new_texture->GetConfig();
// At this point new_texture has the old texture in it,
// we can potentially reuse this, so let's move it back to the pool
texture_pool.emplace(config, TexPoolEntry(std::move(new_texture)));
}
else
{
ERROR_LOG(VIDEO, "Scaling failed");
}
// No need to convert the coordinates here since they'll be the same.
g_renderer->ScaleTexture(new_texture->framebuffer.get(),
new_texture->texture->GetConfig().GetRect(), entry->texture.get(),
entry->texture->GetConfig().GetRect());
entry->texture.swap(new_texture->texture);
entry->framebuffer.swap(new_texture->framebuffer);
// At this point new_texture has the old texture in it,
// we can potentially reuse this, so let's move it back to the pool
auto config = new_texture->texture->GetConfig();
texture_pool.emplace(
config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer)));
}
TextureCacheBase::TCacheEntry*
@ -747,8 +753,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
entry->frameCount = FRAMECOUNT_INVALID;
bound_textures[stage] = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true);
// We need to keep track of invalided textures until they have actually been replaced or
// re-loaded
valid_bind_points.set(stage);
@ -1036,25 +1040,17 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(texformat, tlutfmt) &&
!(from_tmem && texformat == TextureFormat::RGBA8);
const bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
!(from_tmem && texformat == TextureFormat::RGBA8);
// create the entry/texture
TextureConfig config;
config.width = width;
config.height = height;
config.levels = texLevels;
config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8;
ArbitraryMipmapDetector arbitrary_mip_detector;
const TextureConfig config(width, height, texLevels, 1, 1,
hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8, 0);
TCacheEntry* entry = AllocateCacheEntry(config);
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
if (!entry)
return nullptr;
ArbitraryMipmapDetector arbitrary_mip_detector;
const u8* tlut = &texMem[tlutaddr];
if (hires_tex)
{
@ -1068,14 +1064,10 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
if (!hires_tex)
{
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width,
height, expandedWidth, expandedHeight, row_stride, tlut,
tlutfmt);
}
else
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, bytes_per_block * (expandedWidth / bsw),
tlut, tlutfmt))
{
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
@ -1168,20 +1160,16 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);
const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
size_t mip_size =
const u32 mip_size =
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expanded_mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat,
mip_width, mip_height, expanded_mip_width,
expanded_mip_height, row_stride, tlut, tlutfmt);
}
else
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat, mip_width,
mip_height, expanded_mip_width, expanded_mip_height,
bytes_per_block * (expanded_mip_width / bsw), tlut, tlutfmt))
{
// No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning
size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
const u32 decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height,
texformat, tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer,
@ -1212,6 +1200,8 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
// This should only be needed if the texture was updated, or used GPU decoding.
entry->texture->FinishedRendering();
return entry;
}
@ -1379,7 +1369,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
// or as a container for overlapping textures, never need to be combined
// with other textures
TCacheEntry* stitched_entry =
CreateNormalTexture(tex_info, FramebufferManagerBase::GetEFBLayers());
CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers());
stitched_entry->may_have_overlapping_textures = false;
// It is possible that some of the overlapping textures overlap each other.
@ -1540,6 +1530,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
return nullptr;
}
stitched_entry->texture->FinishedRendering();
return stitched_entry;
}
@ -1547,17 +1538,10 @@ TextureCacheBase::TCacheEntry*
TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers)
{
// create the entry/texture
TextureConfig config;
config.width = tex_info.native_width;
config.height = tex_info.native_height;
config.levels = tex_info.computed_levels;
config.format = AbstractTextureFormat::RGBA8;
config.rendertarget = true;
config.layers = layers;
const TextureConfig config(tex_info.native_width, tex_info.native_height,
tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8,
AbstractTextureFlag_RenderTarget);
TCacheEntry* entry = AllocateCacheEntry(config);
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
if (!entry)
return nullptr;
@ -1590,15 +1574,15 @@ TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info)
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu = g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(tex_info.full_format.texfmt,
tex_info.full_format.tlutfmt) &&
!(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8);
const bool decode_on_gpu =
g_ActiveConfig.UseGPUTextureDecoding() &&
!(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8);
// Since it's coming from RAM, it can only have one layer (no stereo).
TCacheEntry* entry = CreateNormalTexture(tex_info, 1);
entry->may_have_overlapping_textures = false;
LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu);
entry->texture->FinishedRendering();
return entry;
}
@ -1608,15 +1592,13 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
{
const u8* tlut = &texMem[tex_info.tlut_address];
if (decode_on_gpu)
{
u32 row_stride = tex_info.bytes_per_block * (tex_info.expanded_width / tex_info.block_width);
g_texture_cache->DecodeTextureOnGPU(
entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, tex_info.full_format.texfmt,
tex_info.native_width, tex_info.native_height, tex_info.expanded_width,
tex_info.expanded_height, row_stride, tlut, tex_info.full_format.tlutfmt);
}
else
if (!decode_on_gpu ||
DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes,
tex_info.full_format.texfmt, tex_info.native_width, tex_info.native_height,
tex_info.expanded_width, tex_info.expanded_height,
tex_info.bytes_per_block *
(tex_info.expanded_width / tex_info.block_width),
tlut, tex_info.full_format.tlutfmt))
{
size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height;
CheckTempSize(decoded_texture_size);
@ -1637,12 +1619,12 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
}
}
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients(
const CopyFilterCoefficients::Values& coefficients) const
EFBCopyFilterCoefficients
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return {{
return EFBCopyFilterCoefficients{
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
@ -1650,31 +1632,31 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f,
}};
};
}
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
const CopyFilterCoefficients::Values& coefficients) const
EFBCopyFilterCoefficients
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients);
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter)
return res;
// Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res[1] += res[0] + res[2];
res[0] = 0;
res[2] = 0;
res.middle = res.upper + res.middle + res.lower;
res.upper = 0.0f;
res.lower = 0.0f;
return res;
}
bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const
bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients[0] != 0 || coefficients[2] != 0;
return coefficients.upper != 0 || coefficients.lower != 0;
}
void TextureCacheBase::CopyRenderTargetToTexture(
@ -1816,12 +1798,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_vram)
{
// create the texture
TextureConfig config;
config.rendertarget = true;
config.width = scaled_tex_w;
config.height = scaled_tex_h;
config.layers = FramebufferManagerBase::GetEFBLayers();
const TextureConfig config(scaled_tex_w, scaled_tex_h, 1, g_framebuffer_manager->GetEFBLayers(),
1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget);
entry = AllocateCacheEntry(config);
if (entry)
{
@ -1866,7 +1844,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
@ -2006,11 +1984,6 @@ void TextureCacheBase::FlushEFBCopies()
m_pending_efb_copies.clear();
}
TextureConfig TextureCacheBase::GetEncodingTextureConfig()
{
return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true);
}
void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
std::unique_ptr<AbstractStagingTexture> staging_texture)
{
@ -2069,8 +2042,8 @@ std::unique_ptr<AbstractStagingTexture> TextureCacheBase::GetEFBCopyStagingTextu
return ptr;
}
std::unique_ptr<AbstractStagingTexture> tex =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig());
std::unique_ptr<AbstractStagingTexture> tex = g_renderer->CreateStagingTexture(
StagingTextureType::Readback, m_efb_encoding_texture->GetConfig());
if (!tex)
WARN_LOG(VIDEO, "Failed to create EFB copy staging texture");
@ -2127,37 +2100,50 @@ void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_
TextureCacheBase::TCacheEntry* TextureCacheBase::AllocateCacheEntry(const TextureConfig& config)
{
std::unique_ptr<AbstractTexture> texture = AllocateTexture(config);
if (!texture)
{
std::optional<TexPoolEntry> alloc = AllocateTexture(config);
if (!alloc)
return nullptr;
}
TCacheEntry* cacheEntry = new TCacheEntry(std::move(texture));
TCacheEntry* cacheEntry =
new TCacheEntry(std::move(alloc->texture), std::move(alloc->framebuffer));
cacheEntry->textures_by_hash_iter = textures_by_hash.end();
cacheEntry->id = last_entry_id++;
return cacheEntry;
}
std::unique_ptr<AbstractTexture> TextureCacheBase::AllocateTexture(const TextureConfig& config)
std::optional<TextureCacheBase::TexPoolEntry>
TextureCacheBase::AllocateTexture(const TextureConfig& config)
{
TexPool::iterator iter = FindMatchingTextureFromPool(config);
std::unique_ptr<AbstractTexture> entry;
if (iter != texture_pool.end())
{
entry = std::move(iter->second.texture);
auto entry = std::move(iter->second);
texture_pool.erase(iter);
return std::move(entry);
}
else
std::unique_ptr<AbstractTexture> texture = g_renderer->CreateTexture(config);
if (!texture)
{
entry = g_renderer->CreateTexture(config);
if (!entry)
return nullptr;
INCSTAT(stats.numTexturesCreated);
WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u texture", config.width, config.height,
config.layers);
return {};
}
return entry;
std::unique_ptr<AbstractFramebuffer> framebuffer;
if (config.IsRenderTarget())
{
framebuffer = g_renderer->CreateFramebuffer(texture.get(), nullptr);
if (!framebuffer)
{
WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u framebuffer", config.width, config.height,
config.layers);
return {};
}
}
INCSTAT(stats.numTexturesCreated);
return TexPoolEntry(std::move(texture), std::move(framebuffer));
}
TextureCacheBase::TexPool::iterator
@ -2170,7 +2156,7 @@ TextureCacheBase::FindMatchingTextureFromPool(const TextureConfig& config)
// As non-render-target textures are usually static, this should not matter much.
auto range = texture_pool.equal_range(config);
auto matching_iter = std::find_if(range.first, range.second, [](const auto& iter) {
return iter.first.rendertarget || iter.second.frameCount != FRAMECOUNT_INVALID;
return iter.first.IsRenderTarget() || iter.second.frameCount != FRAMECOUNT_INVALID;
});
return matching_iter != range.second ? matching_iter : texture_pool.end();
}
@ -2261,7 +2247,8 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe
}
auto config = entry->texture->GetConfig();
texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture)));
texture_pool.emplace(config,
TexPoolEntry(std::move(entry->texture), std::move(entry->framebuffer)));
// Don't delete if there's a pending EFB copy, as we need the TCacheEntry alive.
if (!entry->pending_efb_copy)
@ -2270,6 +2257,283 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe
return textures_by_address.erase(iter);
}
bool TextureCacheBase::CreateUtilityTextures()
{
constexpr TextureConfig encoding_texture_config(
EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, AbstractTextureFlag_RenderTarget);
m_efb_encoding_texture = g_renderer->CreateTexture(encoding_texture_config);
if (!m_efb_encoding_texture)
return false;
m_efb_encoding_framebuffer = g_renderer->CreateFramebuffer(m_efb_encoding_texture.get(), nullptr);
if (!m_efb_encoding_framebuffer)
return false;
if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding)
{
constexpr TextureConfig decoding_texture_config(
1024, 1024, 1, 1, 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_ComputeImage);
m_decoding_texture = g_renderer->CreateTexture(decoding_texture_config);
if (!m_decoding_texture)
return false;
}
return true;
}
void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
dst_format, is_depth_copy, is_intensity, scale_by_half,
NeedsCopyFilterInShader(filter_coefficients)));
if (!copy_pipeline)
{
WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
return;
}
const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect);
AbstractTexture* src_texture =
is_depth_copy ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) :
g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect);
g_renderer->BeginUtilityDrawing();
// Fill uniform buffer.
struct Uniforms
{
float src_left, src_top, src_width, src_height;
float filter_coefficients[3];
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float pixel_height;
u32 padding;
};
Uniforms uniforms;
const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle(
scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer());
const float rcp_efb_width = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBWidth());
const float rcp_efb_height = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBHeight());
uniforms.src_left = framebuffer_rect.left * rcp_efb_width;
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.gamma_rcp = 1.0f / gamma;
uniforms.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f;
uniforms.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f;
uniforms.pixel_height = g_ActiveConfig.bCopyEFBScaled ? rcp_efb_height : 1.0f / EFB_HEIGHT;
uniforms.padding = 0;
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
// Use the copy pipeline to render the VRAM copy.
g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get());
g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect());
g_renderer->SetPipeline(copy_pipeline);
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, scale_by_half ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
entry->texture->FinishedRendering();
}
void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToRAMPipeline(params);
if (!copy_pipeline)
{
WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
return;
}
const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect);
AbstractTexture* src_texture =
params.depth ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) :
g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect);
g_renderer->BeginUtilityDrawing();
// Fill uniform buffer.
struct Uniforms
{
std::array<s32, 4> position_uniform;
float y_scale;
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float filter_coefficients[3];
u32 padding;
};
Uniforms encoder_params;
const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle(
scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer());
const float rcp_efb_height = 1.0f / static_cast<float>(g_framebuffer_manager->GetEFBHeight());
encoder_params.position_uniform[0] = scaled_src_rect.left;
encoder_params.position_uniform[1] = scaled_src_rect.top;
encoder_params.position_uniform[2] = static_cast<s32>(native_width);
encoder_params.position_uniform[3] = scale_by_half ? 2 : 1;
encoder_params.y_scale = y_scale;
encoder_params.gamma_rcp = 1.0f / gamma;
encoder_params.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f;
encoder_params.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f;
encoder_params.filter_coefficients[0] = filter_coefficients.upper;
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
// complex down filtering to average all pixels and produce the correct result.
const bool linear_filter =
(scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f;
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
const u32 render_width = bytes_per_row / sizeof(u32);
const u32 render_height = num_blocks_y;
const auto encode_rect = MathUtil::Rectangle<int>(0, 0, render_width, render_height);
// Render to GPU texture, and then copy to CPU-accessible texture.
g_renderer->SetAndDiscardFramebuffer(m_efb_encoding_framebuffer.get());
g_renderer->SetViewportAndScissor(encode_rect);
g_renderer->SetPipeline(copy_pipeline);
g_renderer->SetTexture(0, src_texture);
g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
dst->CopyFromTexture(m_efb_encoding_texture.get(), encode_rect, 0, 0, encode_rect);
g_renderer->EndUtilityDrawing();
// Flush if there's sufficient draws between this copy and the last.
g_vertex_manager->OnEFBCopyToRAM();
}
bool TextureCacheBase::ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted,
const void* palette, TLUTFormat format)
{
DEBUG_ASSERT(entry->texture->GetConfig().IsRenderTarget() && entry->framebuffer);
if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
ERROR_LOG(VIDEO, "Backend does not support palette conversion!");
return false;
}
g_renderer->BeginUtilityDrawing();
const u32 palette_size = unconverted->format == TextureFormat::I4 ? 32 : 512;
u32 texel_buffer_offset;
if (!g_vertex_manager->UploadTexelBuffer(palette, palette_size,
TexelBufferFormat::TEXEL_BUFFER_FORMAT_R16_UINT,
&texel_buffer_offset))
{
ERROR_LOG(VIDEO, "Texel buffer upload failed");
return false;
}
struct Uniforms
{
float multiplier;
u32 texel_buffer_offset;
u32 pad[2];
};
static_assert(std::is_standard_layout<Uniforms>::value);
Uniforms uniforms = {};
uniforms.multiplier = unconverted->format == TextureFormat::I4 ? 15.0f : 255.0f;
uniforms.texel_buffer_offset = texel_buffer_offset;
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get());
g_renderer->SetViewportAndScissor(entry->texture->GetRect());
g_renderer->SetPipeline(g_shader_cache->GetPaletteConversionPipeline(format));
g_renderer->SetTexture(1, unconverted->texture.get());
g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
entry->texture->FinishedRendering();
return true;
}
bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
u32 data_size, TextureFormat format, u32 width,
u32 height, u32 aligned_width, u32 aligned_height,
u32 row_stride, const u8* palette,
TLUTFormat palette_format)
{
const auto* info = TextureConversionShaderTiled::GetDecodingShaderInfo(format);
if (!info)
return false;
const AbstractShader* shader = g_shader_cache->GetTextureDecodingShader(format, palette_format);
if (!shader)
return false;
// Copy to GPU-visible buffer, aligned to the data type.
const u32 bytes_per_buffer_elem =
VertexManagerBase::GetTexelBufferElementSize(info->buffer_format);
// Allocate space in stream buffer, and copy texture + palette across.
u32 src_offset = 0, palette_offset = 0;
if (info->palette_size > 0)
{
if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset,
palette, info->palette_size,
TEXEL_BUFFER_FORMAT_R16_UINT, &palette_offset))
{
return false;
}
}
else
{
if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset))
return false;
}
// Set up uniforms.
struct Uniforms
{
u32 dst_width, dst_height;
u32 src_width, src_height;
u32 src_offset, src_row_stride;
u32 palette_offset, unused;
} uniforms = {width, height, aligned_width,
aligned_height, src_offset, row_stride / bytes_per_buffer_elem,
palette_offset};
g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms));
g_renderer->SetComputeImageTexture(m_decoding_texture.get(), false, true);
auto dispatch_groups =
TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height);
g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1);
// Copy from decoding texture -> final texture
// This is because we don't want to have to create compute view for every layer
const auto copy_rect = entry->texture->GetConfig().GetMipRect(dst_level);
entry->texture->CopyRectangleFromTexture(m_decoding_texture.get(), copy_rect, 0, 0, copy_rect, 0,
dst_level);
entry->texture->FinishedRendering();
return true;
}
u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
{
const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt);
@ -2362,3 +2626,9 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const
return temp_hash;
}
}
TextureCacheBase::TexPoolEntry::TexPoolEntry(std::unique_ptr<AbstractTexture> tex,
std::unique_ptr<AbstractFramebuffer> fb)
: texture(std::move(tex)), framebuffer(std::move(fb))
{
}

View File

@ -23,6 +23,7 @@
#include "VideoCommon/VideoCommon.h"
struct VideoConfig;
class AbstractFramebuffer;
class AbstractStagingTexture;
struct TextureAndTLUTFormat
@ -68,6 +69,14 @@ struct EFBCopyParams
bool copy_filter;
};
// Reduced version of the full coefficient array, with a single value for each row.
struct EFBCopyFilterCoefficients
{
float upper;
float middle;
float lower;
};
struct TextureLookupInformation
{
u32 address;
@ -110,13 +119,11 @@ private:
static const int FRAMECOUNT_INVALID = 0;
public:
// Reduced version of the full coefficient array, reduced to a single value for each row.
using CopyFilterCoefficientArray = std::array<float, 3>;
struct TCacheEntry
{
// common members
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
u32 addr;
u32 size_in_bytes;
u64 base_hash;
@ -157,7 +164,8 @@ public:
u32 pending_efb_copy_height = 0;
bool pending_efb_copy_invalidated = false;
explicit TCacheEntry(std::unique_ptr<AbstractTexture> tex);
explicit TCacheEntry(std::unique_ptr<AbstractTexture> tex,
std::unique_ptr<AbstractFramebuffer> fb);
~TCacheEntry();
@ -214,7 +222,10 @@ public:
AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; }
};
virtual ~TextureCacheBase(); // needs virtual for DX11 dtor
TextureCacheBase();
virtual ~TextureCacheBase();
bool Initialize();
void OnConfigChanged(VideoConfig& config);
@ -224,15 +235,6 @@ public:
void Invalidate();
virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) = 0;
virtual bool CompileShaders() = 0;
virtual void DeleteShaders() = 0;
TCacheEntry* Load(const u32 stage);
static void InvalidateAllBindPoints() { valid_bind_points.reset(); }
static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); }
@ -262,39 +264,39 @@ public:
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficients::Values& filter_coefficients);
virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format) = 0;
// Returns true if the texture data and palette formats are supported by the GPU decoder.
virtual bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{
return false;
}
// Decodes the specified data to the GPU texture specified by entry.
// width, height are the size of the image in pixels.
// aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
// row_stride is the number of bytes for a row of blocks, not pixels.
virtual void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TLUTFormat palette_format)
{
}
void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);
// Flushes all pending EFB copies to emulated RAM.
void FlushEFBCopies();
// Returns a texture config suitable for drawing a RAM EFB copy into.
static TextureConfig GetEncodingTextureConfig();
// Returns false if the top/bottom row coefficients are zero.
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);
protected:
TextureCacheBase();
// Applies a palette to an EFB copy/texture.
bool ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format);
// Returns false if the top/bottom row coefficients are zero.
bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const;
// Decodes the specified data to the GPU texture specified by entry.
// Returns false if the configuration is not supported.
// width, height are the size of the image in pixels.
// aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
// row_stride is the number of bytes for a row of blocks, not pixels.
bool DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, u32 data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TLUTFormat palette_format);
virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients);
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients);
alignas(16) u8* temp = nullptr;
size_t temp_size = 0;
@ -307,13 +309,17 @@ private:
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;
TexPoolEntry(std::unique_ptr<AbstractTexture> tex) : texture(std::move(tex)) {}
TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
};
using TexAddrCache = std::multimap<u32, TCacheEntry*>;
using TexHashCache = std::multimap<u64, TCacheEntry*>;
using TexPool = std::unordered_multimap<TextureConfig, TexPoolEntry>;
bool CreateUtilityTextures();
void SetBackupConfig(const VideoConfig& config);
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
@ -325,7 +331,7 @@ private:
void CheckTempSize(size_t required_size);
TCacheEntry* AllocateCacheEntry(const TextureConfig& config);
std::unique_ptr<AbstractTexture> AllocateTexture(const TextureConfig& config);
std::optional<TexPoolEntry> AllocateTexture(const TextureConfig& config);
TexPool::iterator FindMatchingTextureFromPool(const TextureConfig& config);
TexAddrCache::iterator GetTexCacheIter(TCacheEntry* entry);
@ -334,12 +340,6 @@ private:
std::pair<TexAddrCache::iterator, TexAddrCache::iterator>
FindOverlappingTextures(u32 addr, u32 size_in_bytes);
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) = 0;
// Removes and unlinks texture from texture cache and returns it to the pool
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter,
bool discard_pending_efb_copy = false);
@ -347,10 +347,10 @@ private:
void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
CopyFilterCoefficientArray
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
CopyFilterCoefficientArray
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
static EFBCopyFilterCoefficients
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
static EFBCopyFilterCoefficients
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
// Flushes a pending EFB copy to RAM from the host to the guest RAM.
void WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
@ -385,6 +385,13 @@ private:
};
BackupConfig backup_config = {};
// Encoding texture used for EFB copies to RAM.
std::unique_ptr<AbstractTexture> m_efb_encoding_texture;
std::unique_ptr<AbstractFramebuffer> m_efb_encoding_framebuffer;
// Decoding texture used for GPU texture decoding.
std::unique_ptr<AbstractTexture> m_decoding_texture;
// Pool of readback textures used for deferred EFB copies.
std::vector<std::unique_ptr<AbstractStagingTexture>> m_efb_copy_staging_texture_pool;

View File

@ -9,8 +9,8 @@
bool TextureConfig::operator==(const TextureConfig& o) const
{
return std::tie(width, height, levels, layers, samples, format, rendertarget) ==
std::tie(o.width, o.height, o.levels, o.layers, o.samples, o.format, o.rendertarget);
return std::tie(width, height, levels, layers, samples, format, flags) ==
std::tie(o.width, o.height, o.levels, o.layers, o.samples, o.format, o.flags);
}
bool TextureConfig::operator!=(const TextureConfig& o) const
@ -38,8 +38,3 @@ size_t TextureConfig::GetMipStride(u32 level) const
{
return AbstractTexture::CalculateStrideForFormat(format, std::max(width >> level, 1u));
}
bool TextureConfig::IsMultisampled() const
{
return samples > 1;
}

View File

@ -34,13 +34,19 @@ enum class StagingTextureType
Mutable // Optimize for CPU reads, GPU writes, allow slow CPU reads
};
enum AbstractTextureFlag : u32
{
AbstractTextureFlag_RenderTarget = (1 << 0), // Texture is used as a framebuffer.
AbstractTextureFlag_ComputeImage = (1 << 1), // Texture is used as a compute image.
};
struct TextureConfig
{
constexpr TextureConfig() = default;
constexpr TextureConfig(u32 width_, u32 height_, u32 levels_, u32 layers_, u32 samples_,
AbstractTextureFormat format_, bool rendertarget_)
AbstractTextureFormat format_, u32 flags_)
: width(width_), height(height_), levels(levels_), layers(layers_), samples(samples_),
format(format_), rendertarget(rendertarget_)
format(format_), flags(flags_)
{
}
@ -50,7 +56,10 @@ struct TextureConfig
MathUtil::Rectangle<int> GetMipRect(u32 level) const;
size_t GetStride() const;
size_t GetMipStride(u32 level) const;
bool IsMultisampled() const;
bool IsMultisampled() const { return samples > 1; }
bool IsRenderTarget() const { return (flags & AbstractTextureFlag_RenderTarget) != 0; }
bool IsComputeImage() const { return (flags & AbstractTextureFlag_ComputeImage) != 0; }
u32 width = 0;
u32 height = 0;
@ -58,7 +67,7 @@ struct TextureConfig
u32 layers = 1;
u32 samples = 1;
AbstractTextureFormat format = AbstractTextureFormat::RGBA8;
bool rendertarget = false;
u32 flags = 0;
};
namespace std
@ -71,7 +80,7 @@ struct hash<TextureConfig>
result_type operator()(const argument_type& c) const noexcept
{
const u64 id = static_cast<u64>(c.rendertarget) << 63 | static_cast<u64>(c.format) << 50 |
const u64 id = static_cast<u64>(c.flags) << 58 | static_cast<u64>(c.format) << 50 |
static_cast<u64>(c.layers) << 48 | static_cast<u64>(c.levels) << 32 |
static_cast<u64>(c.height) << 16 | static_cast<u64>(c.width);
return std::hash<u64>{}(id);

View File

@ -15,7 +15,9 @@
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
#define WRITE p += sprintf
@ -59,21 +61,10 @@ u16 GetEncodedSampleCount(EFBCopyFormat format)
static void WriteHeader(char*& p, APIType ApiType)
{
if (ApiType == APIType::OpenGL)
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
WRITE(p, "uniform int4 position;\n");
WRITE(p, "uniform float y_scale;\n");
WRITE(p, "uniform float gamma_rcp;\n");
WRITE(p, "uniform float2 clamp_tb;\n");
WRITE(p, "uniform float3 filter_coefficients;\n");
WRITE(p, "#define samp0 samp9\n");
WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
}
else if (ApiType == APIType::Vulkan)
{
WRITE(p, "UBO_BINDING(std140, 1) uniform PSBlock {\n");
WRITE(p, " int4 position;\n");
WRITE(p, " float y_scale;\n");
@ -81,8 +72,9 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, " float2 clamp_tb;\n");
WRITE(p, " float3 filter_coefficients;\n");
WRITE(p, "};\n");
WRITE(p, "VARYING_LOCATION(0) in float3 v_tex0;\n");
WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n");
}
else // D3D
{
@ -147,7 +139,7 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
else
{
// Handle D3D depth inversion.
if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
WRITE(p, "1.0 - (");
else
WRITE(p, "(");
@ -225,7 +217,9 @@ static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat f
else // D3D
{
WRITE(p, "void main(\n");
WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n");
WRITE(p, " in float3 v_tex0 : TEXCOORD0,\n");
WRITE(p, " in float4 rawpos : SV_Position,\n");
WRITE(p, " out float4 ocol0 : SV_Target)\n");
WRITE(p, "{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(rawpos.xy);\n");
@ -846,38 +840,65 @@ const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type
// NOTE: In these uniforms, a row refers to a row of blocks, not texels.
static const char decoding_shader_header[] = R"(
#ifdef VULKAN
#if defined(PALETTE_FORMAT_IA8) || defined(PALETTE_FORMAT_RGB565) || defined(PALETTE_FORMAT_RGB5A3)
#define HAS_PALETTE 1
#endif
layout(std140, push_constant) uniform PushConstants {
uvec2 dst_size;
uvec2 src_size;
uint src_offset;
uint src_row_stride;
uint palette_offset;
} push_constants;
#define u_dst_size (push_constants.dst_size)
#define u_src_size (push_constants.src_size)
#define u_src_offset (push_constants.src_offset)
#define u_src_row_stride (push_constants.src_row_stride)
#define u_palette_offset (push_constants.palette_offset)
#ifdef API_D3D
cbuffer UBO : register(b0) {
#else
UBO_BINDING(std140, 1) uniform UBO {
#endif
uint2 u_dst_size;
uint2 u_src_size;
uint u_src_offset;
uint u_src_row_stride;
uint u_palette_offset;
};
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#ifdef API_D3D
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
Buffer<uint4> s_input_buffer : register(t0);
#ifdef HAS_PALETTE
Buffer<uint4> s_palette_buffer : register(t1);
#endif
RWTexture2DArray<unorm float4> output_image : register(u0);
// Helpers for reading/writing.
#define texelFetch(buffer, pos) buffer.Load(pos)
#define imageStore(image, coords, value) image[coords] = value
#define GROUP_MEMORY_BARRIER_WITH_SYNC GroupMemoryBarrierWithGroupSync();
#define GROUP_SHARED groupshared
#define DEFINE_MAIN(lx, ly) \
[numthreads(lx, ly, 1)] \
void main(uint3 gl_WorkGroupID : SV_GroupId, \
uint3 gl_LocalInvocationID : SV_GroupThreadID, \
uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
uint bitfieldExtract(uint val, int off, int size)
{
// This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
// Microsoft's HLSL compiler automatically optimises this to a bitfield extract instruction.
uint mask = uint((1 << size) - 1);
return uint(val >> off) & mask;
}
#else
uniform uvec2 u_dst_size;
uniform uvec2 u_src_size;
uniform uint u_src_offset;
uniform uint u_src_row_stride;
uniform uint u_palette_offset;
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#endif
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer;
SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer;
#define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier();
#define GROUP_SHARED shared
layout(rgba8, binding = 0) uniform writeonly image2DArray output_image;
#define DEFINE_MAIN(lx, ly) \
layout(local_size_x = lx, local_size_y = ly) in; \
void main()
#endif
@ -908,10 +929,10 @@ uint Convert6To8(uint v)
return (v << 2) | (v >> 4);
}
uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords)
uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
{
uvec2 block = coords / block_size;
uvec2 offset = coords % block_size;
uint2 block = coords / block_size;
uint2 offset = coords % block_size;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * (block_size.x * block_size.y);
@ -920,16 +941,16 @@ uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords)
return buffer_pos;
}
uvec4 GetPaletteColor(uint index)
uint4 GetPaletteColor(uint index)
{
// Fetch and swap BE to LE.
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
uvec4 color;
uint4 color;
#if defined(PALETTE_FORMAT_IA8)
uint a = bitfieldExtract(val, 8, 8);
uint i = bitfieldExtract(val, 0, 8);
color = uvec4(i, i, i, a);
color = uint4(i, i, i, a);
#elif defined(PALETTE_FORMAT_RGB565)
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
@ -953,29 +974,27 @@ uvec4 GetPaletteColor(uint index)
}
#else
// Not used.
color = uvec4(0, 0, 0, 0);
color = uint4(0, 0, 0, 0);
#endif
return color;
}
vec4 GetPaletteColorNormalized(uint index)
float4 GetPaletteColorNormalized(uint index)
{
uvec4 color = GetPaletteColor(index);
return vec4(color) / 255.0;
uint4 color = GetPaletteColor(index);
return float4(color) / 255.0;
}
)";
static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
{TextureFormat::I4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x8 blocks, 4 bits per pixel
// We need to do the tiling manually here because the texel size is smaller than
@ -996,108 +1015,98 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
else
i = Convert4To8((val & 0x0Fu));
uvec4 color = uvec4(i, i, i, i);
vec4 norm_color = vec4(color) / 255.0;
uint4 color = uint4(i, i, i, i);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::IA4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i = Convert4To8((val & 0x0Fu));
uint a = Convert4To8((val >> 4));
uvec4 color = uvec4(i, i, i, a);
vec4 norm_color = vec4(color) / 255.0;
uint4 color = uint4(i, i, i, a);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::I8,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
uvec4 color = uvec4(i, i, i, i);
vec4 norm_color = vec4(color) / 255.0;
uint4 color = uint4(i, i, i, i);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::IA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint a = (val & 0xFFu);
uint i = (val >> 8);
uvec4 color = uvec4(i, i, i, a);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
uint4 color = uint4(i, i, i, a);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::RGB565,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uvec4 color;
uint4 color;
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::RGB5A3,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uvec4 color;
uint4 color;
if ((val & 0x8000u) != 0u)
{
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
@ -1113,19 +1122,17 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
}
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::RGBA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
// We can't use the normal calculation function, as these are packed as the AR channels
@ -1144,18 +1151,18 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
uvec4 color;
uint4 color;
color.a = (val1 & 0xFFu);
color.r = (val1 >> 8);
color.g = (val2 & 0xFFu);
color.b = (val2 >> 8);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
float4 norm_color = float4(color) / 255.0;
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::CMPR,
{BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
{TEXEL_BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
R"(
// In the compute version of this decoder, we flatten the blocks to a one-dimension array.
// Each group is subdivided into 16, and the first thread in each group fetches the DXT data.
@ -1167,17 +1174,15 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
#define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y)
#define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE)
layout(local_size_x = GROUP_SIZE, local_size_y = 1) in;
shared uvec2 shared_temp[BLOCKS_PER_GROUP];
uint DXTBlend(uint v1, uint v2)
{
// 3/8 blend, which is close to 1/3
return ((v1 * 3u + v2 * 5u) >> 3);
}
void main()
GROUP_SHARED uint2 shared_temp[BLOCKS_PER_GROUP];
DEFINE_MAIN(GROUP_SIZE, 8)
{
uint local_thread_id = gl_LocalInvocationID.x;
uint block_in_group = local_thread_id / BLOCK_SIZE;
@ -1188,7 +1193,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// from the block size of the overall texture (4 vs 8). We can however use a multiply and
// subtraction to avoid the modulo for calculating the block's X coordinate.
uint blocks_wide = u_src_size.x / BLOCK_SIZE_X;
uvec2 block_coords;
uint2 block_coords;
block_coords.y = block_index / blocks_wide;
block_coords.x = block_index - (block_coords.y * blocks_wide);
@ -1196,8 +1201,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
if (thread_in_block == 0u)
{
// Calculate tiled block coordinates.
uvec2 tile_block_coords = block_coords / 2u;
uvec2 subtile_block_coords = block_coords % 2u;
uint2 tile_block_coords = block_coords / 2u;
uint2 subtile_block_coords = block_coords % 2u;
uint buffer_pos = u_src_offset;
buffer_pos += tile_block_coords.y * u_src_row_stride;
buffer_pos += tile_block_coords.x * 4u;
@ -1205,16 +1210,15 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
buffer_pos += subtile_block_coords.x;
// Read the entire DXT block to shared memory.
uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
shared_temp[block_in_group] = raw_data;
}
// Ensure store is completed before the remaining threads in the block continue.
memoryBarrierShared();
barrier();
GROUP_MEMORY_BARRIER_WITH_SYNC;
// Unpack colors and swap BE to LE.
uvec2 raw_data = shared_temp[block_in_group];
uint2 raw_data = shared_temp[block_in_group];
uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8);
uint c1 = swapped & 0xFFFFu;
uint c2 = swapped >> 16;
@ -1230,18 +1234,18 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Determine the four colors the block can use.
// It's quicker to just precalculate all four colors rather than branching on the index.
// NOTE: These must be masked with 0xFF. This is done at the normalization stage below.
uvec4 color0, color1, color2, color3;
color0 = uvec4(red1, green1, blue1, 255u);
color1 = uvec4(red2, green2, blue2, 255u);
uint4 color0, color1, color2, color3;
color0 = uint4(red1, green1, blue1, 255u);
color1 = uint4(red2, green2, blue2, 255u);
if (c1 > c2)
{
color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u);
color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u);
color2 = uint4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u);
color3 = uint4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u);
}
else
{
color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u);
color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u);
color2 = uint4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u);
color3 = uint4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u);
}
// Calculate the texel coordinates that we will write to.
@ -1257,7 +1261,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Select the un-normalized color from the precalculated color array.
// Using a switch statement here removes the need for dynamic indexing of an array.
uvec4 color;
uint4 color;
switch (index)
{
case 0u: color = color0; break;
@ -1268,19 +1272,17 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
}
// Normalize and write to the output image.
vec4 norm_color = vec4(color & 0xFFu) / 255.0;
imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
float4 norm_color = float4(color & 0xFFu) / 255.0;
imageStore(output_image, int3(int2(uint2(global_x, global_y)), 0), norm_color);
}
)"}},
{TextureFormat::C4,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C4)), 8, 8,
false,
{TEXEL_BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C4)),
8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x8 blocks, 4 bits per pixel
// We need to do the tiling manually here because the texel size is smaller than
@ -1296,58 +1298,52 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::C8,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C8)), 8, 8,
false,
{TEXEL_BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C8)),
8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
{TextureFormat::C14X2,
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8,
8, false,
{TEXEL_BUFFER_FORMAT_R16_UINT,
static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 coords = gl_GlobalInvocationID.xy;
uint2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
)"}},
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
{TextureFormat::XFB,
{BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false,
{TEXEL_BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
DEFINE_MAIN(8, 8)
{
uvec2 uv = gl_GlobalInvocationID.xy;
uint2 uv = gl_GlobalInvocationID.xy;
int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u));
vec4 yuyv = vec4(texelFetch(s_input_buffer, buffer_pos));
float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos));
float y = mix(yuyv.r, yuyv.b, (uv.x & 1u) == 1u);
@ -1355,33 +1351,21 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
float uComp = yuyv.g - 128.0;
float vComp = yuyv.a - 128.0;
vec4 rgb = vec4(yComp + (1.596 * vComp),
float4 rgb = float4(yComp + (1.596 * vComp),
yComp - (0.813 * vComp) - (0.391 * uComp),
yComp + (2.018 * uComp),
255.0);
vec4 rgba_norm = rgb / 255.0;
imageStore(output_image, ivec3(ivec2(uv), 0), rgba_norm);
float4 rgba_norm = rgb / 255.0;
imageStore(output_image, int3(int2(uv), 0), rgba_norm);
}
)"}}};
static const std::array<u32, BUFFER_FORMAT_COUNT> s_buffer_bytes_per_texel = {{
1, // BUFFER_FORMAT_R8_UINT
2, // BUFFER_FORMAT_R16_UINT
8, // BUFFER_FORMAT_R32G32_UINT
4, // BUFFER_FORMAT_RGBA8_UINT
}};
const DecodingShaderInfo* GetDecodingShaderInfo(TextureFormat format)
{
auto iter = s_decoding_shader_info.find(format);
return iter != s_decoding_shader_info.end() ? &iter->second : nullptr;
}
u32 GetBytesPerBufferElement(BufferFormat buffer_format)
{
return s_buffer_bytes_per_texel[buffer_format];
}
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height)
{
// Flatten to a single dimension?
@ -1419,4 +1403,126 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form
return ss.str();
}
std::string GeneratePaletteConversionShader(TLUTFormat palette_format, APIType api_type)
{
std::stringstream ss;
ss << R"(
int Convert3To8(int v)
{
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
}
int Convert4To8(int v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
int Convert5To8(int v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
int Convert6To8(int v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
})";
switch (palette_format)
{
case TLUTFormat::IA8:
ss << R"(
float4 DecodePixel(int val)
{
int i = val & 0xFF;
int a = val >> 8;
return float4(i, i, i, a) / 255.0;
})";
break;
case TLUTFormat::RGB565:
ss << R"(
float4 DecodePixel(int val)
{
int r, g, b, a;
r = Convert5To8((val >> 11) & 0x1f);
g = Convert6To8((val >> 5) & 0x3f);
b = Convert5To8((val) & 0x1f);
a = 0xFF;
return float4(r, g, b, a) / 255.0;
})";
break;
case TLUTFormat::RGB5A3:
ss << R"(
float4 DecodePixel(int val)
{
int r,g,b,a;
if ((val&0x8000) > 0)
{
r=Convert5To8((val>>10) & 0x1f);
g=Convert5To8((val>>5 ) & 0x1f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
}
else
{
a=Convert3To8((val>>12) & 0x7);
r=Convert4To8((val>>8 ) & 0xf);
g=Convert4To8((val>>4 ) & 0xf);
b=Convert4To8((val ) & 0xf);
}
return float4(r, g, b, a) / 255.0;
})";
break;
default:
PanicAlert("Unknown format");
break;
}
ss << "\n";
if (api_type == APIType::D3D)
{
ss << "Buffer<uint> tex0 : register(t0);\n";
ss << "Texture2DArray tex1 : register(t1);\n";
ss << "SamplerState samp1 : register(s1);\n";
ss << "cbuffer PSBlock : register(b0) {\n";
}
else
{
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
}
ss << " float multiplier;\n";
ss << " int texel_buffer_offset;\n";
ss << "};\n";
if (api_type == APIType::D3D)
{
ss << "void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target) {\n";
ss << " int src = int(round(tex1.Sample(samp1, v_tex0).r * multiplier));\n";
ss << " src = int(tex0.Load(src + texel_buffer_offset).r);\n";
}
else
{
ss << "VARYING_LOCATION(0) in float3 v_tex0;\n";
ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n";
ss << "void main() {\n";
ss << " float3 coords = v_tex0;\n";
ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n";
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
}
ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n";
ss << " ocol0 = DecodePixel(src);\n";
ss << "}\n";
return ss.str();
}
} // namespace TextureConversionShaderTiled

View File

@ -13,6 +13,7 @@ enum class APIType;
enum class TextureFormat;
enum class EFBCopyFormat;
enum class TLUTFormat;
enum TexelBufferFormat : u32;
struct EFBCopyParams;
namespace TextureConversionShaderTiled
@ -21,20 +22,10 @@ u16 GetEncodedSampleCount(EFBCopyFormat format);
const char* GenerateEncodingShader(const EFBCopyParams& params, APIType ApiType);
// View format of the input data to the texture decoding shader.
enum BufferFormat
{
BUFFER_FORMAT_R8_UINT,
BUFFER_FORMAT_R16_UINT,
BUFFER_FORMAT_R32G32_UINT,
BUFFER_FORMAT_RGBA8_UINT,
BUFFER_FORMAT_COUNT
};
// Information required to compile and dispatch a texture decoding shader.
struct DecodingShaderInfo
{
BufferFormat buffer_format;
TexelBufferFormat buffer_format;
u32 palette_size;
u32 group_size_x;
u32 group_size_y;
@ -46,10 +37,6 @@ struct DecodingShaderInfo
// If this format does not have a shader written for it, returns nullptr.
const DecodingShaderInfo* GetDecodingShaderInfo(TextureFormat format);
// Determine how many bytes there are in each element of the texel buffer.
// Needed for alignment and stride calculations.
u32 GetBytesPerBufferElement(BufferFormat buffer_format);
// Determine how many thread groups should be dispatched for an image of the specified width/height.
// First is the number of X groups, second is the number of Y groups, Z is always one.
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
@ -58,4 +45,7 @@ std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width,
std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format,
APIType api_type);
// Returns the GLSL string containing the palette conversion shader for the specified format.
std::string GeneratePaletteConversionShader(TLUTFormat palette_format, APIType api_type);
} // namespace TextureConversionShaderTiled

View File

@ -31,72 +31,99 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
return out;
}
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
static void WriteHeader(APIType api_type, ShaderCode& out)
{
const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
ShaderCode out;
if (api_type == APIType::OpenGL)
if (api_type == APIType::D3D)
{
out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"uniform float3 filter_coefficients;\n"
"uniform float gamma_rcp;\n"
"uniform float2 clamp_tb;\n"
"uniform float pixel_height;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
" return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("#define uv0 f_uv0\n"
"in vec3 uv0;\n"
"out vec4 ocol0;\n"
"void main(){\n");
out.Write("cbuffer PSBlock : register(b0) {\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"};\n\n");
}
else if (api_type == APIType::Vulkan)
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"};\n");
}
}
ShaderCode GenerateVertexShader(APIType api_type)
{
ShaderCode out;
WriteHeader(api_type, out);
if (api_type == APIType::D3D)
{
out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
" out float4 opos : SV_Position) {\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n"
"#define id gl_VertexID\n"
"#define opos gl_Position\n"
"void main() {\n");
}
out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
out.Write(
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
// NDC space is flipped in Vulkan
if (api_type == APIType::Vulkan)
out.Write(" opos.y = -opos.y;\n");
out.Write("}\n");
return out;
}
ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
{
const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
ShaderCode out;
WriteHeader(api_type, out);
if (api_type == APIType::D3D)
{
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"float4 SampleEFB(float3 uv, float y_offset) {\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n\n",
mono_depth ? "0.0" : "uv.z");
out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("layout(location = 0) in vec3 uv0;\n"
"layout(location = 1) in vec4 col0;\n"
"layout(location = 0) out vec4 ocol0;"
"void main(){\n");
}
else if (api_type == APIType::D3D)
{
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"uniform float3 filter_coefficients;\n"
"uniform float gamma_rcp;\n"
"uniform float2 clamp_tb;\n"
"uniform float pixel_height;\n\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("void main(out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0) {\n");
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n"
"FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
"void main()\n{\n");
}
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
if (uid_data->copy_filter)
{
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
@ -105,14 +132,14 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
else
{
out.Write(
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
}
if (uid_data->is_depth_copy)
{
if (api_type == APIType::D3D || api_type == APIType::Vulkan)
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
out.Write("texcol.x = 1.0 - texcol.x;\n");
out.Write(" int depth = int(texcol.x * 16777216.0);\n"

View File

@ -28,7 +28,8 @@ struct UidData
using TCShaderUid = ShaderUid<UidData>;
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
ShaderCode GenerateVertexShader(APIType api_type);
ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data);
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half, bool copy_filter);

View File

@ -52,8 +52,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
const bool early_depth = uid_data->early_depth != 0;
const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
const bool bounding_box =
host_config.bounding_box && g_ActiveConfig.BBoxUseFragmentShaderImplementation();
const bool bounding_box = host_config.bounding_box;
const u32 numTexgen = uid_data->num_texgens;
ShaderCode out;
@ -1058,7 +1057,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
if (host_config.fast_depth_calc)
{
if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
if (!host_config.backend_reversed_depth_range)
out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
else
out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n");
@ -1113,7 +1112,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n");
out.Write(" // If early depth isn't enabled, we write to the zbuffer here\n");
out.Write(" int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n");
if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
if (!host_config.backend_reversed_depth_range)
out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n");
else
out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n");

View File

@ -19,6 +19,7 @@
#include "VideoCommon/DataReader.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexLoaderManager.h"
@ -131,7 +132,7 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
auto iter = s_native_vertex_map.find(decl);
if (iter == s_native_vertex_map.end())
{
std::unique_ptr<NativeVertexFormat> fmt = g_vertex_manager->CreateNativeVertexFormat(decl);
std::unique_ptr<NativeVertexFormat> fmt = g_renderer->CreateNativeVertexFormat(decl);
auto ipair = s_native_vertex_map.emplace(decl, std::move(fmt));
iter = ipair.first;
}
@ -228,9 +229,7 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
const PortableVertexDeclaration& format = loader->m_native_vtx_decl;
std::unique_ptr<NativeVertexFormat>& native = s_native_vertex_map[format];
if (!native)
{
native = g_vertex_manager->CreateNativeVertexFormat(format);
}
native = g_renderer->CreateNativeVertexFormat(format);
loader->m_native_vertex_format = native.get();
}
state->vertex_loaders[vtx_attr_group] = loader;

View File

@ -17,8 +17,9 @@
#include "Core/ConfigManager.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/NativeVertexFormat.h"
@ -79,11 +80,15 @@ static bool AspectIs16_9(float width, float height)
}
VertexManagerBase::VertexManagerBase()
: m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE)
{
}
VertexManagerBase::~VertexManagerBase()
VertexManagerBase::~VertexManagerBase() = default;
bool VertexManagerBase::Initialize()
{
return true;
}
u32 VertexManagerBase::GetRemainingSize() const
@ -94,6 +99,10 @@ u32 VertexManagerBase::GetRemainingSize() const
DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride,
bool cullall)
{
// Flush all EFB pokes and invalidate the peek cache.
g_framebuffer_manager->InvalidatePeekCache();
g_framebuffer_manager->FlushEFBPokes();
// The SSE vertex loader can write up to 4 bytes past the end
u32 const needed_vertex_bytes = count * stride + 4;
@ -132,7 +141,18 @@ DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count,
// need to alloc new buffer
if (m_is_flushed)
{
g_vertex_manager->ResetBuffer(stride, cullall);
if (cullall)
{
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data();
m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
IndexGenerator::Start(m_cpu_index_buffer.data());
}
else
{
ResetBuffer(stride);
}
m_is_flushed = false;
}
@ -210,6 +230,48 @@ std::pair<size_t, size_t> VertexManagerBase::ResetFlushAspectRatioCount()
return val;
}
void VertexManagerBase::ResetBuffer(u32 vertex_stride)
{
m_base_buffer_pointer = m_cpu_vertex_buffer.data();
m_cur_buffer_pointer = m_cpu_vertex_buffer.data();
m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
IndexGenerator::Start(m_cpu_index_buffer.data());
}
void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
{
*out_base_vertex = 0;
*out_base_index = 0;
}
void VertexManagerBase::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
{
// If bounding box is enabled, we need to flush any changes first, then invalidate what we have.
if (::BoundingBox::active && g_ActiveConfig.bBBoxEnable &&
g_ActiveConfig.backend_info.bSupportsBBox)
{
g_renderer->BBoxFlush();
}
g_renderer->DrawIndexed(base_index, num_indices, base_vertex);
}
void VertexManagerBase::UploadUniforms()
{
}
void VertexManagerBase::InvalidateConstants()
{
VertexShaderManager::dirty = true;
GeometryShaderManager::dirty = true;
PixelShaderManager::dirty = true;
}
void VertexManagerBase::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
}
void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_stride,
u32 num_vertices, const u16* indices, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
@ -218,7 +280,7 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s
ASSERT(m_is_flushed);
// Copy into the buffers usually used for GX drawing.
ResetBuffer(std::max(vertex_stride, 1u), false);
ResetBuffer(std::max(vertex_stride, 1u));
if (vertices)
{
const u32 copy_size = vertex_stride * num_vertices;
@ -232,11 +294,51 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s
CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index);
}
u32 VertexManagerBase::GetTexelBufferElementSize(TexelBufferFormat buffer_format)
{
// R8 - 1, R16 - 2, RGBA8 - 4, R32G32 - 8
return 1u << static_cast<u32>(buffer_format);
}
bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset)
{
return false;
}
bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset, const void* palette_data,
u32 palette_size, TexelBufferFormat palette_format,
u32* palette_offset)
{
return false;
}
void VertexManagerBase::LoadTextures()
{
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true;
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
for (unsigned int i : usedtextures)
g_texture_cache->Load(i);
g_texture_cache->BindTextures();
}
void VertexManagerBase::Flush()
{
if (m_is_flushed)
return;
m_is_flushed = true;
// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();
@ -280,29 +382,6 @@ void VertexManagerBase::Flush()
(bpmem.alpha_test.hex >> 16) & 0xff);
#endif
// If the primitave is marked CullAll. All we need to do is update the vertex constants and
// calculate the zfreeze refrence slope
if (!m_cull_all)
{
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true;
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
for (unsigned int i : usedtextures)
g_texture_cache->Load(i);
g_texture_cache->BindTextures();
}
// set global vertex constants
VertexShaderManager::SetConstants();
// Track some stats used elsewhere by the anamorphic widescreen heuristic.
if (!SConfig::GetInstance().bWii)
{
@ -322,6 +401,7 @@ void VertexManagerBase::Flush()
}
// Calculate ZSlope for zfreeze
VertexShaderManager::SetConstants();
if (!bpmem.genMode.zfreeze)
{
// Must be done after VertexShaderManager::SetConstants()
@ -335,20 +415,24 @@ void VertexManagerBase::Flush()
if (!m_cull_all)
{
// Update and upload constants. Note for the Vulkan backend, this must occur before the
// vertex/index buffer is committed, otherwise the data will be associated with the
// previous command buffer, instead of the one with the draw if there is an overflow.
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
UploadConstants();
// Now the vertices can be flushed to the GPU.
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
const u32 num_indices = IndexGenerator::GetIndexLen();
u32 base_vertex, base_index;
CommitBuffer(IndexGenerator::GetNumVerts(),
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
&base_vertex, &base_index);
// Texture loading can cause palettes to be applied (-> uniforms -> draws).
// Palette application does not use vertices, only a full-screen quad, so this is okay.
// Same with GPU texture decoding, which uses compute shaders.
LoadTextures();
// Now we can upload uniforms, as nothing else will override them.
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
UploadUniforms();
// Update the pipeline, or compile one if needed.
UpdatePipelineConfig();
UpdatePipelineObject();
@ -363,18 +447,17 @@ void VertexManagerBase::Flush()
if (PerfQueryBase::ShouldEmulate())
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
OnDraw();
}
}
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
{
ERROR_LOG(VIDEO,
"xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.",
xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
m_is_flushed = true;
m_cull_all = false;
}
}
void VertexManagerBase::DoState(PointerWrap& p)
@ -577,3 +660,109 @@ void VertexManagerBase::UpdatePipelineObject()
break;
}
}
void VertexManagerBase::OnDraw()
{
m_draw_counter++;
// If we didn't have any CPU access last frame, do nothing.
if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution)
return;
// Check if this draw is scheduled to kick a command buffer.
// The draw counters will always be sorted so a binary search is possible here.
if (std::binary_search(m_scheduled_command_buffer_kicks.begin(),
m_scheduled_command_buffer_kicks.end(), m_draw_counter))
{
// Kick a command buffer on the background thread.
g_renderer->Flush();
}
}
void VertexManagerBase::OnCPUEFBAccess()
{
// Check this isn't another access without any draws inbetween.
if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter)
return;
// Store the current draw counter for scheduling in OnEndFrame.
m_cpu_accesses_this_frame.emplace_back(m_draw_counter);
}
void VertexManagerBase::OnEFBCopyToRAM()
{
// If we're not deferring, try to preempt it next frame.
if (!g_ActiveConfig.bDeferEFBCopies)
{
OnCPUEFBAccess();
return;
}
// Otherwise, only execute if we have at least 10 objects between us and the last copy.
const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter;
m_last_efb_copy_draw_counter = m_draw_counter;
if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
return;
g_renderer->Flush();
}
void VertexManagerBase::OnEndFrame()
{
m_draw_counter = 0;
m_last_efb_copy_draw_counter = 0;
m_scheduled_command_buffer_kicks.clear();
// If we have no CPU access at all, leave everything in the one command buffer for maximum
// parallelism between CPU/GPU, at the cost of slightly higher latency.
if (m_cpu_accesses_this_frame.empty())
return;
// In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway
// between the draw counters that invoked the readback, or every 250 draws, whichever is smaller.
if (g_ActiveConfig.iCommandBufferExecuteInterval > 0)
{
u32 last_draw_counter = 0;
u32 interval = static_cast<u32>(g_ActiveConfig.iCommandBufferExecuteInterval);
for (u32 draw_counter : m_cpu_accesses_this_frame)
{
// We don't want to waste executing command buffers for only a few draws, so set a minimum.
// Leave last_draw_counter as-is, so we get the correct number of draws between submissions.
u32 draw_count = draw_counter - last_draw_counter;
if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
continue;
if (draw_count <= interval)
{
u32 mid_point = draw_count / 2;
m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point);
}
else
{
u32 counter = interval;
while (counter < draw_count)
{
m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter);
counter += interval;
}
}
last_draw_counter = draw_counter;
}
}
#if 0
{
std::stringstream ss;
std::for_each(m_cpu_accesses_this_frame.begin(), m_cpu_accesses_this_frame.end(), [&ss](u32 idx) { ss << idx << ","; });
WARN_LOG(VIDEO, "CPU EFB accesses in last frame: %s", ss.str().c_str());
}
{
std::stringstream ss;
std::for_each(m_scheduled_command_buffer_kicks.begin(), m_scheduled_command_buffer_kicks.end(), [&ss](u32 idx) { ss << idx << ","; });
WARN_LOG(VIDEO, "Scheduled command buffer kicks: %s", ss.str().c_str());
}
#endif
m_cpu_accesses_this_frame.clear();
}

View File

@ -25,6 +25,16 @@ struct Slope
bool dirty;
};
// View format of the input data to the texture decoding shader.
enum TexelBufferFormat : u32
{
TEXEL_BUFFER_FORMAT_R8_UINT,
TEXEL_BUFFER_FORMAT_R16_UINT,
TEXEL_BUFFER_FORMAT_RGBA8_UINT,
TEXEL_BUFFER_FORMAT_R32G32_UINT,
NUM_TEXEL_BUFFER_FORMATS
};
class VertexManagerBase
{
private:
@ -42,19 +52,24 @@ public:
// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
static constexpr u32 MAXIBUFFERSIZE = MathUtil::NextPowerOf2(MAX_PRIMITIVES_PER_COMMAND * 3);
// Streaming buffer sizes.
// Texel buffer will fit the maximum size of an encoded GX texture. 1024x1024, RGBA8 = 4MB.
static constexpr u32 VERTEX_STREAM_BUFFER_SIZE = 40 * 1024 * 1024;
static constexpr u32 INDEX_STREAM_BUFFER_SIZE = 4 * 1024 * 1024;
static constexpr u32 UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr u32 TEXEL_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
VertexManagerBase();
// needs to be virtual for DX11's dtor
virtual ~VertexManagerBase();
virtual bool Initialize();
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
void FlushData(u32 count, u32 stride);
void Flush();
virtual std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0;
void DoState(PointerWrap& p);
std::pair<size_t, size_t> ResetFlushAspectRatioCount();
@ -70,38 +85,69 @@ public:
}
// Utility pipeline drawing (e.g. EFB copies, post-processing, UI).
virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) = 0;
virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size);
void UploadUtilityVertices(const void* vertices, u32 vertex_stride, u32 num_vertices,
const u16* indices, u32 num_indices, u32* out_base_vertex,
u32* out_base_index);
// Determine how many bytes there are in each element of the texel buffer.
// Needed for alignment and stride calculations.
static u32 GetTexelBufferElementSize(TexelBufferFormat buffer_format);
// Texel buffer, used for palette conversion.
virtual bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset);
// The second set of parameters uploads a second blob in the same buffer, used for GPU texture
// decoding for palette textures, as both the texture data and palette must be uploaded.
virtual bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset, const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset);
// CPU access tracking - call after a draw call is made.
void OnDraw();
// Call after CPU access is requested.
void OnCPUEFBAccess();
// Call after an EFB copy to RAM. If true, the current command buffer should be executed.
void OnEFBCopyToRAM();
// Call at the end of a frame.
void OnEndFrame();
protected:
// Vertex buffers/index buffer creation.
virtual void CreateDeviceObjects() {}
virtual void DestroyDeviceObjects() {}
// When utility uniforms are used, the GX uniforms need to be re-written afterwards.
static void InvalidateConstants();
// Prepares the buffer for the next batch of vertices.
virtual void ResetBuffer(u32 vertex_stride, bool cull_all) = 0;
virtual void ResetBuffer(u32 vertex_stride);
// Commits/uploads the current batch of vertices.
virtual void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index) = 0;
u32* out_base_vertex, u32* out_base_index);
// Uploads uniform buffers for GX draws.
virtual void UploadConstants() = 0;
virtual void UploadUniforms();
// Issues the draw call for the current batch in the backend.
virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) = 0;
virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex);
u32 GetRemainingSize() const;
static u32 GetRemainingIndices(int primitive);
void CalculateZSlope(NativeVertexFormat* format);
void LoadTextures();
u8* m_cur_buffer_pointer = nullptr;
u8* m_base_buffer_pointer = nullptr;
u8* m_end_buffer_pointer = nullptr;
u32 GetRemainingSize() const;
static u32 GetRemainingIndices(int primitive);
// Alternative buffers in CPU memory for primitives we are going to discard.
std::vector<u8> m_cpu_vertex_buffer;
std::vector<u16> m_cpu_index_buffer;
Slope m_zslope = {};
void CalculateZSlope(NativeVertexFormat* format);
VideoCommon::GXPipelineUid m_current_pipeline_config;
VideoCommon::GXUberPipelineUid m_current_uber_pipeline_config;
@ -114,12 +160,22 @@ protected:
bool m_cull_all = false;
private:
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
static constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10;
void UpdatePipelineConfig();
void UpdatePipelineObject();
bool m_is_flushed = true;
size_t m_flush_count_4_3 = 0;
size_t m_flush_count_anamorphic = 0;
void UpdatePipelineConfig();
void UpdatePipelineObject();
// CPU access tracking
u32 m_draw_counter = 0;
u32 m_last_efb_copy_draw_counter = 0;
std::vector<u32> m_cpu_accesses_this_frame;
std::vector<u32> m_scheduled_command_buffer_kicks;
bool m_allow_background_execution = true;
};
extern std::unique_ptr<VertexManagerBase> g_vertex_manager;

View File

@ -282,7 +282,6 @@ void VideoBackendBase::InitializeShared()
m_initialized = true;
m_invalid = false;
frameCount = 0;
CommandProcessor::Init();
Fifo::Init();

View File

@ -48,11 +48,11 @@
<ClCompile Include="BPStructs.cpp" />
<ClCompile Include="CommandProcessor.cpp" />
<ClCompile Include="CPMemory.cpp" />
<ClCompile Include="Debugger.cpp" />
<ClCompile Include="DriverDetails.cpp" />
<ClCompile Include="Fifo.cpp" />
<ClCompile Include="FPSCounter.cpp" />
<ClCompile Include="FramebufferManagerBase.cpp" />
<ClCompile Include="FramebufferManager.cpp" />
<ClCompile Include="FramebufferShaderGen.cpp" />
<ClCompile Include="HiresTextures.cpp" />
<ClCompile Include="HiresTextures_DDSLoader.cpp" />
<ClCompile Include="ImageWrite.cpp" />
@ -114,11 +114,11 @@
<ClInclude Include="CommandProcessor.h" />
<ClInclude Include="CPMemory.h" />
<ClInclude Include="DataReader.h" />
<ClInclude Include="Debugger.h" />
<ClInclude Include="DriverDetails.h" />
<ClInclude Include="Fifo.h" />
<ClInclude Include="FPSCounter.h" />
<ClInclude Include="FramebufferManagerBase.h" />
<ClInclude Include="FramebufferManager.h" />
<ClInclude Include="FramebufferShaderGen.h" />
<ClInclude Include="GXPipelineTypes.h" />
<ClInclude Include="ShaderCache.h" />
<ClInclude Include="UberShaderCommon.h" />

View File

@ -29,12 +29,6 @@
<ClCompile Include="PixelEngine.cpp" />
<ClCompile Include="VideoBackendBase.cpp" />
<ClCompile Include="VideoConfig.cpp" />
<ClCompile Include="Debugger.cpp">
<Filter>Base</Filter>
</ClCompile>
<ClCompile Include="FramebufferManagerBase.cpp">
<Filter>Base</Filter>
</ClCompile>
<ClCompile Include="PerfQueryBase.cpp">
<Filter>Base</Filter>
</ClCompile>
@ -197,6 +191,12 @@
<ClCompile Include="ShaderCache.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="FramebufferShaderGen.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="FramebufferManager.cpp">
<Filter>Base</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="CommandProcessor.h" />
@ -206,12 +206,6 @@
<ClInclude Include="VideoBackendBase.h" />
<ClInclude Include="VideoCommon.h" />
<ClInclude Include="VideoConfig.h" />
<ClInclude Include="Debugger.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="FramebufferManagerBase.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="PerfQueryBase.h">
<Filter>Base</Filter>
</ClInclude>
@ -374,7 +368,6 @@
<ClInclude Include="AbstractPipeline.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="TextureConverterShaderGen.h" />
<ClInclude Include="AbstractFramebuffer.h">
<Filter>Base</Filter>
</ClInclude>
@ -384,6 +377,15 @@
<ClInclude Include="ShaderCache.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="FramebufferShaderGen.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="FramebufferManager.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="TextureConverterShaderGen.h">
<Filter>Shader Generators</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="CMakeLists.txt" />

View File

@ -149,8 +149,6 @@ void VideoConfig::Refresh()
bEFBAccessEnable = Config::Get(Config::GFX_HACK_EFB_ACCESS_ENABLE);
bBBoxEnable = Config::Get(Config::GFX_HACK_BBOX_ENABLE);
bBBoxPreferStencilImplementation =
Config::Get(Config::GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION);
bForceProgressive = Config::Get(Config::GFX_HACK_FORCE_PROGRESSIVE);
bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM);
bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM);

View File

@ -114,7 +114,6 @@ struct VideoConfig final
bool bEFBAccessEnable;
bool bPerfQueriesEnable;
bool bBBoxEnable;
bool bBBoxPreferStencilImplementation; // OpenGL-only, to see how slow it is compared to SSBOs
bool bForceProgressive;
bool bEFBEmulateFormatChanges;
@ -186,6 +185,7 @@ struct VideoConfig final
std::string AdapterName; // for OpenGL
u32 MaxTextureSize;
bool bUsesLowerLeftOrigin;
bool bSupportsExclusiveFullscreen;
bool bSupportsDualSourceBlend;
@ -215,6 +215,7 @@ struct VideoConfig final
bool bSupportsBPTCTextures;
bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES
bool bSupportsBackgroundCompiling;
bool bSupportsLargePoints;
} backend_info;
// Utility
@ -223,12 +224,6 @@ struct VideoConfig final
{
return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen;
}
bool BBoxUseFragmentShaderImplementation() const
{
if (backend_info.api_type == APIType::OpenGL && bBBoxPreferStencilImplementation)
return false;
return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics;
}
bool UseGPUTextureDecoding() const
{
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;