diff --git a/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp b/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp index 80670b981d..f326e46365 100644 --- a/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp +++ b/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp @@ -1870,6 +1870,9 @@ const GLFunc gl_function_array[] = { GLFUNC_REQUIRES(glDispatchCompute, "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"), GLFUNC_REQUIRES(glDispatchComputeIndirect, "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"), + + // ARB_get_texture_sub_image + GLFUNC_REQUIRES(glGetTextureSubImage, "GL_ARB_get_texture_sub_image !VERSION_4_5"), }; namespace GLExtensions diff --git a/Source/Core/VideoBackends/D3D/DXTexture.cpp b/Source/Core/VideoBackends/D3D/DXTexture.cpp index 30f664b5b4..92d3d112e9 100644 --- a/Source/Core/VideoBackends/D3D/DXTexture.cpp +++ b/Source/Core/VideoBackends/D3D/DXTexture.cpp @@ -217,8 +217,140 @@ void DXTexture::CopyRectangleFromTexture(const AbstractTexture* source, void DXTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) { - size_t src_pitch = CalculateHostTextureLevelPitch(m_config.format, row_length); + size_t src_pitch = CalculateStrideForFormat(m_config.format, row_length); D3D::context->UpdateSubresource(m_texture->GetTex(), level, nullptr, buffer, static_cast(src_pitch), 0); } + +DXStagingTexture::DXStagingTexture(StagingTextureType type, const TextureConfig& config, + ID3D11Texture2D* tex) + : AbstractStagingTexture(type, config), m_tex(tex) +{ +} + +DXStagingTexture::~DXStagingTexture() +{ + if (IsMapped()) + DXStagingTexture::Unmap(); + SAFE_RELEASE(m_tex); +} + +std::unique_ptr DXStagingTexture::Create(StagingTextureType type, + const TextureConfig& config) +{ + D3D11_USAGE usage; + UINT cpu_flags; + if (type == StagingTextureType::Readback) + { + usage = D3D11_USAGE_STAGING; + cpu_flags = D3D11_CPU_ACCESS_READ; + } + else if (type == StagingTextureType::Upload) + { + usage = D3D11_USAGE_DYNAMIC; + cpu_flags = D3D11_CPU_ACCESS_WRITE; + } + else + { + usage = D3D11_USAGE_STAGING; + cpu_flags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } + + CD3D11_TEXTURE2D_DESC desc(GetDXGIFormatForHostFormat(config.format), config.width, config.height, + 1, 1, 0, usage, cpu_flags); + + ID3D11Texture2D* texture; + HRESULT hr = D3D::device->CreateTexture2D(&desc, nullptr, &texture); + CHECK(SUCCEEDED(hr), "Create staging texture"); + if (FAILED(hr)) + return nullptr; + + return std::unique_ptr(new DXStagingTexture(type, config, texture)); +} + +void DXStagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect) +{ + _assert_(m_type == StagingTextureType::Readback); + _assert_(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + _assert_(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + _assert_(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); + + if (IsMapped()) + DXStagingTexture::Unmap(); + + CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); + D3D::context->CopySubresourceRegion( + m_tex, 0, static_cast(dst_rect.left), static_cast(dst_rect.top), 0, + static_cast(src)->GetRawTexIdentifier()->GetTex(), + D3D11CalcSubresource(src_level, src_layer, src->GetConfig().levels), &src_box); + + m_needs_flush = true; +} + +void DXStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) +{ + _assert_(m_type == StagingTextureType::Upload); + _assert_(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + _assert_(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); + _assert_(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + + if (IsMapped()) + DXStagingTexture::Unmap(); + + CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); + D3D::context->CopySubresourceRegion( + static_cast(dst)->GetRawTexIdentifier()->GetTex(), + D3D11CalcSubresource(dst_level, dst_layer, dst->GetConfig().levels), + static_cast(dst_rect.left), static_cast(dst_rect.top), 0, m_tex, 0, &src_box); +} + +bool DXStagingTexture::Map() +{ + if (m_map_pointer) + return true; + + D3D11_MAP map_type; + if (m_type == StagingTextureType::Readback) + map_type = D3D11_MAP_READ; + else if (m_type == StagingTextureType::Upload) + map_type = D3D11_MAP_WRITE; + else + map_type = D3D11_MAP_READ_WRITE; + + D3D11_MAPPED_SUBRESOURCE sr; + HRESULT hr = D3D::context->Map(m_tex, 0, map_type, 0, &sr); + CHECK(SUCCEEDED(hr), "Map readback texture"); + if (FAILED(hr)) + return false; + + m_map_pointer = reinterpret_cast(sr.pData); + m_map_stride = sr.RowPitch; + return true; +} + +void DXStagingTexture::Unmap() +{ + if (!m_map_pointer) + return; + + D3D::context->Unmap(m_tex, 0); + m_map_pointer = nullptr; +} + +void DXStagingTexture::Flush() +{ + // Flushing is handled by the API. + m_needs_flush = false; +} + } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXTexture.h b/Source/Core/VideoBackends/D3D/DXTexture.h index 8dfffd38d5..4403eb2d43 100644 --- a/Source/Core/VideoBackends/D3D/DXTexture.h +++ b/Source/Core/VideoBackends/D3D/DXTexture.h @@ -6,6 +6,7 @@ #include "Common/CommonTypes.h" +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" class D3DTexture2D; @@ -38,4 +39,30 @@ private: ID3D11Texture2D* m_staging_texture = nullptr; }; +class DXStagingTexture final : public AbstractStagingTexture +{ +public: + DXStagingTexture() = delete; + ~DXStagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + + static std::unique_ptr Create(StagingTextureType type, + const TextureConfig& config); + +private: + DXStagingTexture(StagingTextureType type, const TextureConfig& config, ID3D11Texture2D* tex); + + ID3D11Texture2D* m_tex = nullptr; +}; + } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 0bddc4cd40..a7518cdf64 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -216,6 +216,12 @@ std::unique_ptr Renderer::CreateTexture(const TextureConfig& co return std::make_unique(config); } +std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, + const TextureConfig& config) +{ + return DXStagingTexture::Create(type, config); +} + void Renderer::RenderText(const std::string& text, int left, int top, u32 color) { D3D::DrawTextScaled(static_cast(left + 1), static_cast(top + 1), 20.f, 0.0f, diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index 527fd1f4fc..4668d54a92 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -23,6 +23,8 @@ public: StateCache& GetStateCache() { return m_state_cache; } std::unique_ptr CreateTexture(const TextureConfig& config) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; void SetBlendingState(const BlendingState& state) override; void SetScissorRect(const EFBRectangle& rc) override; diff --git a/Source/Core/VideoBackends/Null/NullTexture.cpp b/Source/Core/VideoBackends/Null/NullTexture.cpp index 7d852f7ddf..b5363d3918 100644 --- a/Source/Core/VideoBackends/Null/NullTexture.cpp +++ b/Source/Core/VideoBackends/Null/NullTexture.cpp @@ -25,4 +25,43 @@ void NullTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u { } +NullStagingTexture::NullStagingTexture(StagingTextureType type, const TextureConfig& config) + : AbstractStagingTexture(type, config) +{ + m_texture_buf.resize(m_texel_size * config.width * config.height); + m_map_pointer = reinterpret_cast(m_texture_buf.data()); + m_map_stride = m_texel_size * config.width; +} + +NullStagingTexture::~NullStagingTexture() = default; + +void NullStagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect) +{ + m_needs_flush = true; +} + +void NullStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, + AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) +{ + m_needs_flush = true; +} + +bool NullStagingTexture::Map() +{ + return true; +} + +void NullStagingTexture::Unmap() +{ +} + +void NullStagingTexture::Flush() +{ + m_needs_flush = false; +} + } // namespace Null diff --git a/Source/Core/VideoBackends/Null/NullTexture.h b/Source/Core/VideoBackends/Null/NullTexture.h index 65f4252050..81227e162f 100644 --- a/Source/Core/VideoBackends/Null/NullTexture.h +++ b/Source/Core/VideoBackends/Null/NullTexture.h @@ -4,8 +4,11 @@ #pragma once +#include + #include "Common/CommonTypes.h" +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" namespace Null @@ -25,4 +28,25 @@ public: size_t buffer_size) override; }; +class NullStagingTexture final : public AbstractStagingTexture +{ +public: + explicit NullStagingTexture(StagingTextureType type, const TextureConfig& config); + ~NullStagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + +private: + std::vector m_texture_buf; +}; + } // namespace Null diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp index 6b045a0c6a..c589c8278f 100644 --- a/Source/Core/VideoBackends/Null/Render.cpp +++ b/Source/Core/VideoBackends/Null/Render.cpp @@ -27,6 +27,12 @@ std::unique_ptr Renderer::CreateTexture(const TextureConfig& co return std::make_unique(config); } +std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, + const TextureConfig& config) +{ + return std::make_unique(type, config); +} + void Renderer::RenderText(const std::string& text, int left, int top, u32 color) { NOTICE_LOG(VIDEO, "RenderText: %s", text.c_str()); diff --git a/Source/Core/VideoBackends/Null/Render.h b/Source/Core/VideoBackends/Null/Render.h index 9ba2ef575b..32b1d560e5 100644 --- a/Source/Core/VideoBackends/Null/Render.h +++ b/Source/Core/VideoBackends/Null/Render.h @@ -15,6 +15,8 @@ public: ~Renderer() override; std::unique_ptr CreateTexture(const TextureConfig& config) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; void RenderText(const std::string& pstr, int left, int top, u32 color) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override { return 0; } diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.cpp b/Source/Core/VideoBackends/OGL/OGLTexture.cpp index 272a879090..f6390381c6 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.cpp +++ b/Source/Core/VideoBackends/OGL/OGLTexture.cpp @@ -37,7 +37,7 @@ GLenum GetGLInternalFormatForTextureFormat(AbstractTextureFormat format, bool st case AbstractTextureFormat::RGBA8: return storage ? GL_RGBA8 : GL_RGBA; case AbstractTextureFormat::BGRA8: - return GL_BGRA; + return storage ? GL_RGBA8 : GL_BGRA; default: PanicAlert("Unhandled texture format."); return storage ? GL_RGBA8 : GL_RGBA; @@ -70,6 +70,15 @@ GLenum GetGLTypeForTextureFormat(AbstractTextureFormat format) return GL_UNSIGNED_BYTE; } } + +bool UsePersistentStagingBuffers() +{ + // We require ARB_buffer_storage to create the persistent mapped buffer, + // ARB_shader_image_load_store for glMemoryBarrier, and ARB_sync to ensure + // the GPU has finished the copy before reading the buffer from the CPU. + return g_ogl_config.bSupportsGLBufferStorage && g_ogl_config.bSupportsImageLoadStore && + g_ogl_config.bSupportsGLSync; +} } // Anonymous namespace OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config) @@ -91,7 +100,7 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co if (m_config.rendertarget) { // We can't render to compressed formats. - _assert_(!IsCompressedHostTextureFormat(m_config.format)); + _assert_(!IsCompressedFormat(m_config.format)); if (!g_ogl_config.bSupportsTextureStorage) { @@ -106,6 +115,10 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co FramebufferManager::SetFramebuffer(m_framebuffer); FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_ARRAY, m_texId, 0); + + // We broke the framebuffer binding here, and need to restore it, as the CreateTexture + // method is in the base renderer class and can be called by VideoCommon. + FramebufferManager::SetFramebuffer(0); } SetStage(); @@ -275,7 +288,7 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length); GLenum gl_internal_format = GetGLInternalFormatForTextureFormat(m_config.format, false); - if (IsCompressedHostTextureFormat(m_config.format)) + if (IsCompressedFormat(m_config.format)) { if (g_ogl_config.bSupportsTextureStorage) { @@ -321,4 +334,262 @@ void OGLTexture::SetStage() glActiveTexture(GL_TEXTURE0 + s_ActiveTexture); } +OGLStagingTexture::OGLStagingTexture(StagingTextureType type, const TextureConfig& config, + GLenum target, GLuint buffer_name, size_t buffer_size, + char* map_ptr, size_t map_stride) + : AbstractStagingTexture(type, config), m_target(target), m_buffer_name(buffer_name), + m_buffer_size(buffer_size) +{ + m_map_pointer = map_ptr; + m_map_stride = map_stride; +} + +OGLStagingTexture::~OGLStagingTexture() +{ + if (m_fence != 0) + glDeleteSync(m_fence); + if (m_map_pointer) + { + glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_name); + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + if (m_buffer_name != 0) + glDeleteBuffers(1, &m_buffer_name); +} + +std::unique_ptr OGLStagingTexture::Create(StagingTextureType type, + const TextureConfig& config) +{ + size_t stride = config.GetStride(); + size_t buffer_size = stride * config.height; + GLenum target = + type == StagingTextureType::Readback ? GL_PIXEL_PACK_BUFFER : GL_PIXEL_UNPACK_BUFFER; + GLuint buffer; + glGenBuffers(1, &buffer); + glBindBuffer(target, buffer); + + // Prefer using buffer_storage where possible. This allows us to skip the map/unmap steps. + char* buffer_ptr; + if (UsePersistentStagingBuffers()) + { + GLenum buffer_flags; + GLenum map_flags; + if (type == StagingTextureType::Readback) + { + buffer_flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT; + map_flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT; + } + else if (type == StagingTextureType::Upload) + { + buffer_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; + map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT; + } + else + { + buffer_flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; + map_flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; + } + + glBufferStorage(target, buffer_size, nullptr, buffer_flags); + buffer_ptr = + reinterpret_cast(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags)); + _assert_(buffer_ptr != nullptr); + } + else + { + // Otherwise, fallback to mapping the buffer each time. + glBufferData(target, buffer_size, nullptr, + type == StagingTextureType::Readback ? GL_STREAM_READ : GL_STREAM_DRAW); + buffer_ptr = nullptr; + } + glBindBuffer(target, 0); + + return std::unique_ptr( + new OGLStagingTexture(type, config, target, buffer, buffer_size, buffer_ptr, stride)); +} + +void OGLStagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect) +{ + _assert_(m_type == StagingTextureType::Readback); + _assert_(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + _assert_(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + _assert_(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); + + // Unmap the buffer before writing when not using persistent mappings. + if (!UsePersistentStagingBuffers()) + OGLStagingTexture::Unmap(); + + // Copy from the texture object to the staging buffer. + glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_name); + glPixelStorei(GL_PACK_ROW_LENGTH, m_config.width); + + const OGLTexture* gltex = static_cast(src); + size_t dst_offset = dst_rect.top * m_config.GetStride() + dst_rect.left * m_texel_size; + + // If we don't have a FBO associated with this texture, we need to use a slow path. + if (gltex->GetFramebuffer() != 0 && src_layer == 0 && src_level == 0) + { + // This texture has a framebuffer, so we can use glReadPixels(). + glBindFramebuffer(GL_READ_FRAMEBUFFER, gltex->GetFramebuffer()); + glReadPixels(src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), + GetGLFormatForTextureFormat(m_config.format), + GetGLTypeForTextureFormat(m_config.format), reinterpret_cast(dst_offset)); + + // Reset both read/draw framebuffers. + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); + } + else + { + glActiveTexture(GL_TEXTURE9); + glBindTexture(GL_TEXTURE_2D_ARRAY, gltex->GetRawTexIdentifier()); + if (g_ogl_config.bSupportsTextureSubImage) + { + glGetTextureSubImage( + GL_TEXTURE_2D_ARRAY, src_level, src_rect.left, src_rect.top, src_layer, + src_rect.GetWidth(), src_rect.GetHeight(), 1, + GetGLFormatForTextureFormat(m_config.format), GetGLTypeForTextureFormat(m_config.format), + static_cast(m_buffer_size - dst_offset), reinterpret_cast(dst_offset)); + } + else + { + // TODO: Investigate whether it's faster to use glReadPixels() with a framebuffer, since we're + // copying the whole texture, which may waste bandwidth. So we're trading CPU work in creating + // the framebuffer for GPU work in copying potentially redundant texels. + glGetTexImage(GL_TEXTURE_2D_ARRAY, src_level, GetGLFormatForTextureFormat(m_config.format), + GetGLTypeForTextureFormat(m_config.format), nullptr); + } + + OGLTexture::SetStage(); + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + // If we support buffer storage, create a fence for synchronization. + if (UsePersistentStagingBuffers()) + { + if (m_fence != 0) + glDeleteSync(m_fence); + + glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + + m_needs_flush = true; +} + +void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, + AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) +{ + _assert_(m_type == StagingTextureType::Upload); + _assert_(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + _assert_(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); + _assert_(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + + size_t src_offset = src_rect.top * m_config.GetStride() + src_rect.left * m_texel_size; + size_t copy_size = src_rect.GetHeight() * m_config.GetStride(); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer_name); + glPixelStorei(GL_UNPACK_ROW_LENGTH, m_config.width); + + if (!UsePersistentStagingBuffers()) + { + // Unmap the buffer before writing when not using persistent mappings. + if (m_map_pointer) + { + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + m_map_pointer = nullptr; + } + } + else + { + // Since we're not using coherent mapping, we must flush the range explicitly. + if (m_type == StagingTextureType::Upload) + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, src_offset, copy_size); + glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + } + + // Copy from the staging buffer to the texture object. + glActiveTexture(GL_TEXTURE9); + glBindTexture(GL_TEXTURE_2D_ARRAY, static_cast(dst)->GetRawTexIdentifier()); + glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, dst_rect.left, dst_rect.top, dst_layer, + dst_rect.GetWidth(), dst_rect.GetHeight(), 1, + GetGLFormatForTextureFormat(m_config.format), + GetGLTypeForTextureFormat(m_config.format), reinterpret_cast(src_offset)); + OGLTexture::SetStage(); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + // If we support buffer storage, create a fence for synchronization. + if (UsePersistentStagingBuffers()) + { + if (m_fence != 0) + glDeleteSync(m_fence); + + m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + + m_needs_flush = true; +} + +void OGLStagingTexture::Flush() +{ + // No-op when not using buffer storage, as the transfers happen on Map(). + // m_fence will always be zero in this case. + if (m_fence == 0) + { + m_needs_flush = false; + return; + } + + glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(m_fence); + m_fence = 0; + m_needs_flush = false; +} + +bool OGLStagingTexture::Map() +{ + if (m_map_pointer) + return true; + + // Slow path, map the texture, unmap it later. + GLenum flags; + if (m_type == StagingTextureType::Readback) + flags = GL_MAP_READ_BIT; + else if (m_type == StagingTextureType::Upload) + flags = GL_MAP_WRITE_BIT; + else + flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT; + glBindBuffer(m_target, m_buffer_name); + m_map_pointer = reinterpret_cast(glMapBufferRange(m_target, 0, m_buffer_size, flags)); + if (!m_map_pointer) + return false; + + return true; +} + +void OGLStagingTexture::Unmap() +{ + // No-op with persistent mapped buffers. + if (!m_map_pointer || UsePersistentStagingBuffers()) + return; + + glBindBuffer(m_target, m_buffer_name); + glUnmapBuffer(m_target); + glBindBuffer(m_target, 0); + m_map_pointer = nullptr; +} + } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.h b/Source/Core/VideoBackends/OGL/OGLTexture.h index 91cb09bbca..064006fead 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.h +++ b/Source/Core/VideoBackends/OGL/OGLTexture.h @@ -8,6 +8,7 @@ #include "Common/GL/GLUtil.h" +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" namespace OGL @@ -43,4 +44,35 @@ private: std::vector m_staging_data; }; +class OGLStagingTexture final : public AbstractStagingTexture +{ +public: + OGLStagingTexture() = delete; + ~OGLStagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + + static std::unique_ptr Create(StagingTextureType type, + const TextureConfig& config); + +private: + OGLStagingTexture(StagingTextureType type, const TextureConfig& config, GLenum target, + GLuint buffer_name, size_t buffer_size, char* map_ptr, size_t map_stride); + +private: + GLenum m_target; + GLuint m_buffer_name; + size_t m_buffer_size; + GLsync m_fence = 0; +}; + } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index fff40f7abf..780f4fa199 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -458,7 +458,7 @@ Renderer::Renderer() GLExtensions::Supports("GL_EXT_copy_image") || GLExtensions::Supports("GL_OES_copy_image")) && !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_COPYIMAGE); - g_ogl_config.bSupportTextureSubImage = GLExtensions::Supports("ARB_get_texture_sub_image"); + g_ogl_config.bSupportsTextureSubImage = GLExtensions::Supports("ARB_get_texture_sub_image"); // Desktop OpenGL supports the binding layout if it supports 420pack // OpenGL ES 3.1 supports it implicitly without an extension @@ -623,6 +623,8 @@ Renderer::Renderer() // Compute shaders are core in GL4.3. g_Config.backend_info.bSupportsComputeShaders = true; + if (GLExtensions::Version() >= 450) + g_ogl_config.bSupportsTextureSubImage = true; } else { @@ -819,6 +821,12 @@ std::unique_ptr Renderer::CreateTexture(const TextureConfig& co return std::make_unique(config); } +std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, + const TextureConfig& config) +{ + return OGLStagingTexture::Create(type, config); +} + void Renderer::RenderText(const std::string& text, int left, int top, u32 color) { u32 backbuffer_width = std::max(GLInterface->GetBackBufferWidth(), 1u); diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 2e60df3f6d..e787828d53 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -58,7 +58,7 @@ struct VideoConfig bool bSupportsImageLoadStore; bool bSupportsAniso; bool bSupportsBitfield; - bool bSupportTextureSubImage; + bool bSupportsTextureSubImage; const char* gl_vendor; const char* gl_renderer; @@ -78,6 +78,8 @@ public: void Shutdown(); std::unique_ptr CreateTexture(const TextureConfig& config) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; void SetBlendingState(const BlendingState& state) override; void SetScissorRect(const EFBRectangle& rc) override; diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp index ea7ccaba7c..ee4b644ca7 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.cpp +++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp @@ -40,6 +40,12 @@ std::unique_ptr SWRenderer::CreateTexture(const TextureConfig& return std::make_unique(config); } +std::unique_ptr +SWRenderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config) +{ + return std::make_unique(type, config); +} + void SWRenderer::RenderText(const std::string& pstr, int left, int top, u32 color) { SWOGLWindow::s_instance->PrintText(pstr, left, top, color); diff --git a/Source/Core/VideoBackends/Software/SWRenderer.h b/Source/Core/VideoBackends/Software/SWRenderer.h index ac8764e0c7..312acd1991 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.h +++ b/Source/Core/VideoBackends/Software/SWRenderer.h @@ -17,6 +17,8 @@ public: static void Shutdown(); std::unique_ptr CreateTexture(const TextureConfig& config) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; void RenderText(const std::string& pstr, int left, int top, u32 color) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; diff --git a/Source/Core/VideoBackends/Software/SWTexture.cpp b/Source/Core/VideoBackends/Software/SWTexture.cpp index aa279b520b..184a223392 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.cpp +++ b/Source/Core/VideoBackends/Software/SWTexture.cpp @@ -5,6 +5,7 @@ #include "VideoBackends/Software/SWTexture.h" #include +#include "Common/Assert.h" #include "VideoBackends/Software/CopyRegion.h" @@ -21,7 +22,31 @@ struct Pixel u8 a; }; #pragma pack(pop) + +void CopyTextureData(const TextureConfig& src_config, const u8* src_ptr, u32 src_x, u32 src_y, + u32 width, u32 height, const TextureConfig& dst_config, u8* dst_ptr, u32 dst_x, + u32 dst_y) +{ + size_t texel_size = AbstractTexture::GetTexelSizeForFormat(src_config.format); + size_t src_stride = src_config.GetStride(); + size_t src_offset = + static_cast(src_y) * src_stride + static_cast(src_x) * texel_size; + size_t dst_stride = dst_config.GetStride(); + size_t dst_offset = + static_cast(dst_y) * dst_stride + static_cast(dst_x) * texel_size; + size_t copy_len = static_cast(width) * texel_size; + + src_ptr += src_offset; + dst_ptr += dst_offset; + for (u32 i = 0; i < height; i++) + { + std::memcpy(dst_ptr, src_ptr, copy_len); + src_ptr += src_stride; + dst_ptr += dst_stride; + } } +} + SWTexture::SWTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config) { m_data.resize(tex_config.width * tex_config.height * 4); @@ -78,4 +103,49 @@ std::optional SWTexture::MapFullImpl() m_config.height}; } +SWStagingTexture::SWStagingTexture(StagingTextureType type, const TextureConfig& config) + : AbstractStagingTexture(type, config) +{ + m_data.resize(m_texel_size * config.width * config.height); + m_map_pointer = reinterpret_cast(m_data.data()); + m_map_stride = m_texel_size * config.width; +} + +SWStagingTexture::~SWStagingTexture() = default; + +void SWStagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect) +{ + _assert_(src_level == 0 && src_layer == 0); + CopyTextureData(src->GetConfig(), static_cast(src)->GetData(), src_rect.left, + src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), m_config, m_data.data(), + dst_rect.left, dst_rect.top); + m_needs_flush = true; +} + +void SWStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) +{ + _assert_(dst_level == 0 && dst_layer == 0); + CopyTextureData(m_config, m_data.data(), src_rect.left, src_rect.top, src_rect.GetWidth(), + src_rect.GetHeight(), dst->GetConfig(), static_cast(dst)->GetData(), + dst_rect.left, dst_rect.top); + m_needs_flush = true; +} + +bool SWStagingTexture::Map() +{ + return true; +} + +void SWStagingTexture::Unmap() +{ +} + +void SWStagingTexture::Flush() +{ + m_needs_flush = false; +} } // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWTexture.h b/Source/Core/VideoBackends/Software/SWTexture.h index fa7fea5308..2dbec7d0ab 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.h +++ b/Source/Core/VideoBackends/Software/SWTexture.h @@ -8,6 +8,7 @@ #include "Common/CommonTypes.h" +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" namespace SW @@ -35,4 +36,25 @@ private: std::vector m_data; }; +class SWStagingTexture final : public AbstractStagingTexture +{ +public: + explicit SWStagingTexture(StagingTextureType type, const TextureConfig& config); + ~SWStagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + +private: + std::vector m_data; +}; + } // namespace SW diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp index 51706fc33f..5d70d38584 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -371,6 +371,12 @@ void CommandBufferManager::OnCommandBufferExecuted(size_t index) FrameResources& resources = m_frame_resources[index]; // Fire fence tracking callbacks. + for (auto iter = m_fence_point_callbacks.begin(); iter != m_fence_point_callbacks.end();) + { + auto backup_iter = iter++; + backup_iter->second.second(resources.fence); + } + for (const auto& iter : m_fence_point_callbacks) iter.second.second(resources.fence); diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index 0bb1640f47..af19ad4043 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -164,6 +164,12 @@ std::unique_ptr Renderer::CreateTexture(const TextureConfig& co return VKTexture::Create(config); } +std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, + const TextureConfig& config) +{ + return VKStagingTexture::Create(type, config); +} + void Renderer::RenderText(const std::string& text, int left, int top, u32 color) { u32 backbuffer_width = m_swap_chain->GetWidth(); diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h index 250e76634d..b3f8168347 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.h +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -33,6 +33,8 @@ public: static Renderer* GetInstance(); std::unique_ptr CreateTexture(const TextureConfig& config) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; SwapChain* GetSwapChain() const { return m_swap_chain.get(); } BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); } diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h index 65cfb1c5c7..2ecb21cb22 100644 --- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h @@ -59,11 +59,11 @@ public: static std::unique_ptr Create(STAGING_BUFFER_TYPE type, VkDeviceSize size, VkBufferUsageFlags usage); -protected: // Allocates the resources needed to create a staging buffer. static bool AllocateBuffer(STAGING_BUFFER_TYPE type, VkDeviceSize size, VkBufferUsageFlags usage, VkBuffer* out_buffer, VkDeviceMemory* out_memory, bool* out_coherent); +protected: STAGING_BUFFER_TYPE m_type; VkBuffer m_buffer; VkDeviceMemory m_memory; diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index cbb6e0c063..cfea34ff8e 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -291,7 +291,7 @@ void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); u32 block_size = Util::GetBlockSize(m_texture->GetFormat()); u32 num_rows = Common::AlignUp(height, block_size) / block_size; - size_t source_pitch = CalculateHostTextureLevelPitch(m_config.format, row_length); + size_t source_pitch = CalculateStrideForFormat(m_config.format, row_length); size_t upload_size = source_pitch * num_rows; std::unique_ptr temp_buffer; VkBuffer upload_buffer; @@ -356,4 +356,224 @@ void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* } } +VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig& config, + std::unique_ptr buffer) + : AbstractStagingTexture(type, config), m_staging_buffer(std::move(buffer)) +{ +} + +VKStagingTexture::~VKStagingTexture() +{ + if (m_needs_flush) + VKStagingTexture::Flush(); +} + +std::unique_ptr VKStagingTexture::Create(StagingTextureType type, + const TextureConfig& config) +{ + size_t stride = config.GetStride(); + size_t buffer_size = stride * static_cast(config.height); + + STAGING_BUFFER_TYPE buffer_type; + VkImageUsageFlags buffer_usage; + if (type == StagingTextureType::Readback) + { + buffer_type = STAGING_BUFFER_TYPE_READBACK; + buffer_usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + else if (type == StagingTextureType::Upload) + { + buffer_type = STAGING_BUFFER_TYPE_UPLOAD; + buffer_usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } + else + { + buffer_type = STAGING_BUFFER_TYPE_READBACK; + buffer_usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + VkBuffer buffer; + VkDeviceMemory memory; + bool coherent; + if (!StagingBuffer::AllocateBuffer(buffer_type, buffer_size, buffer_usage, &buffer, &memory, + &coherent)) + { + return nullptr; + } + + std::unique_ptr staging_buffer = + std::make_unique(buffer_type, buffer, memory, buffer_size, coherent); + std::unique_ptr staging_tex = std::unique_ptr( + new VKStagingTexture(type, config, std::move(staging_buffer))); + + // Use persistent mapping. + if (!staging_tex->m_staging_buffer->Map()) + return nullptr; + staging_tex->m_map_pointer = staging_tex->m_staging_buffer->GetMapPointer(); + staging_tex->m_map_stride = stride; + return staging_tex; +} + +void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect) +{ + _assert_(m_type == StagingTextureType::Readback); + _assert_(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + _assert_(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + _assert_(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); + + Texture2D* src_tex = static_cast(src)->GetRawTexIdentifier(); + CopyFromTexture(src_tex, src_rect, src_layer, src_level, dst_rect); +} + +void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) +{ + if (m_needs_flush) + { + // Drop copy before reusing it. + g_command_buffer_mgr->RemoveFencePointCallback(this); + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + } + + VkImageLayout old_layout = src->GetLayout(); + src->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + // Issue the image->buffer copy, but delay it for now. + VkBufferImageCopy image_copy = {}; + VkImageAspectFlags aspect = + Util::IsDepthFormat(src->GetFormat()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + image_copy.bufferOffset = + static_cast(static_cast(dst_rect.top) * m_config.GetStride() + + static_cast(dst_rect.left) * m_texel_size); + image_copy.bufferRowLength = static_cast(m_config.width); + image_copy.bufferImageHeight = 0; + image_copy.imageSubresource = {aspect, src_level, src_layer, 1}; + image_copy.imageOffset = {src_rect.left, src_rect.top, 0}; + image_copy.imageExtent = {static_cast(src_rect.GetWidth()), + static_cast(src_rect.GetHeight()), 1u}; + vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_staging_buffer->GetBuffer(), 1, + &image_copy); + + // Restore old source texture layout. + src->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); + + m_needs_flush = true; + g_command_buffer_mgr->AddFencePointCallback(this, + [this](VkCommandBuffer buf, VkFence fence) { + _assert_(m_needs_flush); + m_flush_fence = fence; + }, + [this](VkFence fence) { + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + g_command_buffer_mgr->RemoveFencePointCallback( + this); + }); +} + +void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) +{ + _assert_(m_type == StagingTextureType::Upload); + _assert_(src_rect.GetWidth() == dst_rect.GetWidth() && + src_rect.GetHeight() == dst_rect.GetHeight()); + _assert_(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); + _assert_(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + + if (m_needs_flush) + { + // Drop copy before reusing it. + g_command_buffer_mgr->RemoveFencePointCallback(this); + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + } + + // Flush caches before copying. + m_staging_buffer->FlushCPUCache(); + + Texture2D* dst_tex = static_cast(dst)->GetRawTexIdentifier(); + VkImageLayout old_layout = dst_tex->GetLayout(); + dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + // Issue the image->buffer copy, but delay it for now. + VkBufferImageCopy image_copy = {}; + image_copy.bufferOffset = + static_cast(static_cast(src_rect.top) * m_config.GetStride() + + static_cast(src_rect.left) * m_texel_size); + image_copy.bufferRowLength = static_cast(m_config.width); + image_copy.bufferImageHeight = 0; + image_copy.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, dst_layer, 1}; + image_copy.imageOffset = {dst_rect.left, dst_rect.top, 0}; + image_copy.imageExtent = {static_cast(dst_rect.GetWidth()), + static_cast(dst_rect.GetHeight()), 1u}; + vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + m_staging_buffer->GetBuffer(), dst_tex->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + + // Restore old source texture layout. + dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); + + m_needs_flush = true; + g_command_buffer_mgr->AddFencePointCallback(this, + [this](VkCommandBuffer buf, VkFence fence) { + _assert_(m_needs_flush); + m_flush_fence = fence; + }, + [this](VkFence fence) { + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + g_command_buffer_mgr->RemoveFencePointCallback( + this); + }); +} + +bool VKStagingTexture::Map() +{ + // Always mapped. + return true; +} + +void VKStagingTexture::Unmap() +{ + // Always mapped. +} + +void VKStagingTexture::Flush() +{ + if (!m_needs_flush) + return; + + // Either of the below two calls will cause the callback to fire. + g_command_buffer_mgr->RemoveFencePointCallback(this); + if (m_flush_fence != VK_NULL_HANDLE) + { + // WaitForFence should fire the callback. + g_command_buffer_mgr->WaitForFence(m_flush_fence); + } + else + { + // We don't have a fence, and are pending. That means the readback is in the current + // command buffer, and must execute it to populate the staging texture. + Util::ExecuteCurrentCommandsAndRestoreState(false, true); + } + m_needs_flush = false; + + // For readback textures, invalidate the CPU cache as there is new data there. + if (m_type == StagingTextureType::Readback) + m_staging_buffer->InvalidateCPUCache(); +} + } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.h b/Source/Core/VideoBackends/Vulkan/VKTexture.h index 61ffae242c..5f2df0c26b 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.h +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.h @@ -7,10 +7,12 @@ #include #include +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" namespace Vulkan { +class StagingBuffer; class Texture2D; class VKTexture final : public AbstractTexture @@ -56,4 +58,37 @@ private: VkFramebuffer m_framebuffer; }; +class VKStagingTexture final : public AbstractStagingTexture +{ +public: + VKStagingTexture() = delete; + ~VKStagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + + // This overload is provided for compatibility as we dropped StagingTexture2D. + // For now, FramebufferManager relies on them. But we can drop it once we move that to common. + void CopyFromTexture(Texture2D* src, const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect); + + static std::unique_ptr Create(StagingTextureType type, + const TextureConfig& config); + +private: + VKStagingTexture(StagingTextureType type, const TextureConfig& config, + std::unique_ptr buffer); + + std::unique_ptr m_staging_buffer; + VkFence m_flush_fence = VK_NULL_HANDLE; +}; + } // namespace Vulkan diff --git a/Source/Core/VideoCommon/AbstractStagingTexture.cpp b/Source/Core/VideoCommon/AbstractStagingTexture.cpp new file mode 100644 index 0000000000..6d4e973761 --- /dev/null +++ b/Source/Core/VideoCommon/AbstractStagingTexture.cpp @@ -0,0 +1,133 @@ +// Copyright 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" +#include "Common/MsgHandler.h" +#include "VideoCommon/AbstractStagingTexture.h" +#include "VideoCommon/AbstractTexture.h" + +AbstractStagingTexture::AbstractStagingTexture(StagingTextureType type, const TextureConfig& c) + : m_type(type), m_config(c), m_texel_size(AbstractTexture::GetTexelSizeForFormat(c.format)) +{ +} + +AbstractStagingTexture::~AbstractStagingTexture() = default; + +void AbstractStagingTexture::CopyFromTexture(const AbstractTexture* src, u32 src_layer, + u32 src_level) +{ + MathUtil::Rectangle src_rect = src->GetConfig().GetMipRect(src_level); + MathUtil::Rectangle dst_rect = m_config.GetRect(); + CopyFromTexture(src, src_rect, src_layer, src_level, dst_rect); +} + +void AbstractStagingTexture::CopyToTexture(AbstractTexture* dst, u32 dst_layer, u32 dst_level) +{ + MathUtil::Rectangle src_rect = m_config.GetRect(); + MathUtil::Rectangle dst_rect = dst->GetConfig().GetMipRect(dst_level); + CopyToTexture(src_rect, dst, dst_rect, dst_layer, dst_level); +} + +void AbstractStagingTexture::ReadTexels(const MathUtil::Rectangle& rect, void* out_ptr, + u32 out_stride) +{ + _assert_(m_type != StagingTextureType::Upload); + if (!PrepareForAccess()) + return; + + _assert_(rect.left >= 0 && static_cast(rect.right) <= m_config.width && rect.top >= 0 && + static_cast(rect.bottom) <= m_config.height); + + // Offset pointer to point to start of region being copied out. + const char* current_ptr = m_map_pointer; + current_ptr += rect.top * m_map_stride; + current_ptr += rect.left * m_texel_size; + + // Optimal path: same dimensions, same stride. + if (rect.left == 0 && static_cast(rect.right) == m_config.width && + m_map_stride == out_stride) + { + std::memcpy(out_ptr, current_ptr, m_map_stride * rect.GetHeight()); + return; + } + + size_t copy_size = std::min(static_cast(rect.GetWidth() * m_texel_size), m_map_stride); + int copy_height = rect.GetHeight(); + char* dst_ptr = reinterpret_cast(out_ptr); + for (int row = 0; row < copy_height; row++) + { + std::memcpy(dst_ptr, current_ptr, copy_size); + current_ptr += m_map_stride; + dst_ptr += out_stride; + } +} + +void AbstractStagingTexture::ReadTexel(u32 x, u32 y, void* out_ptr) +{ + _assert_(m_type != StagingTextureType::Upload); + if (!PrepareForAccess()) + return; + + _assert_(x < m_config.width && y < m_config.height); + const char* src_ptr = m_map_pointer + y * m_map_stride + x * m_texel_size; + std::memcpy(out_ptr, src_ptr, m_texel_size); +} + +void AbstractStagingTexture::WriteTexels(const MathUtil::Rectangle& rect, const void* in_ptr, + u32 in_stride) +{ + _assert_(m_type != StagingTextureType::Readback); + if (!PrepareForAccess()) + return; + + _assert_(rect.left >= 0 && static_cast(rect.right) <= m_config.width && rect.top >= 0 && + static_cast(rect.bottom) <= m_config.height); + + // Offset pointer to point to start of region being copied to. + char* current_ptr = m_map_pointer; + current_ptr += rect.top * m_map_stride; + current_ptr += rect.left * m_texel_size; + + // Optimal path: same dimensions, same stride. + if (rect.left == 0 && static_cast(rect.right) == m_config.width && m_map_stride == in_stride) + { + std::memcpy(current_ptr, in_ptr, m_map_stride * rect.GetHeight()); + return; + } + + size_t copy_size = std::min(static_cast(rect.GetWidth() * m_texel_size), m_map_stride); + int copy_height = rect.GetHeight(); + const char* src_ptr = reinterpret_cast(in_ptr); + for (int row = 0; row < copy_height; row++) + { + std::memcpy(current_ptr, src_ptr, copy_size); + current_ptr += m_map_stride; + src_ptr += in_stride; + } +} + +void AbstractStagingTexture::WriteTexel(u32 x, u32 y, const void* in_ptr) +{ + _assert_(m_type != StagingTextureType::Readback); + if (!PrepareForAccess()) + return; + + _assert_(x < m_config.width && y < m_config.height); + char* dest_ptr = m_map_pointer + y * m_map_stride + x * m_texel_size; + std::memcpy(dest_ptr, in_ptr, m_texel_size); +} + +bool AbstractStagingTexture::PrepareForAccess() +{ + if (m_needs_flush) + { + if (IsMapped()) + Unmap(); + Flush(); + } + return IsMapped() || Map(); +} diff --git a/Source/Core/VideoCommon/AbstractStagingTexture.h b/Source/Core/VideoCommon/AbstractStagingTexture.h new file mode 100644 index 0000000000..c87dfd70b0 --- /dev/null +++ b/Source/Core/VideoCommon/AbstractStagingTexture.h @@ -0,0 +1,86 @@ +// Copyright 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/MathUtil.h" +#include "VideoCommon/TextureConfig.h" + +class AbstractTexture; + +class AbstractStagingTexture +{ +public: + explicit AbstractStagingTexture(StagingTextureType type, const TextureConfig& c); + virtual ~AbstractStagingTexture(); + + const TextureConfig& GetConfig() const { return m_config; } + StagingTextureType GetType() const { return m_type; } + size_t GetTexelSize() const { return m_texel_size; } + bool IsMapped() const { return m_map_pointer != nullptr; } + char* GetMappedPointer() const { return m_map_pointer; } + size_t GetMappedStride() const { return m_map_stride; } + // Copies from the GPU texture object to the staging texture, which can be mapped/read by the CPU. + // Both src_rect and dst_rect must be with within the bounds of the the specified textures. + virtual void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) = 0; + + // Wrapper for copying a whole layer of a texture to a readback texture. + // Assumes that the level of src texture and this texture have the same dimensions. + void CopyFromTexture(const AbstractTexture* src, u32 src_layer = 0, u32 src_level = 0); + + // Copies from this staging texture to a GPU texture. + // Both src_rect and dst_rect must be with within the bounds of the the specified textures. + virtual void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) = 0; + + // Wrapper for copying a whole layer of a texture to a readback texture. + // Assumes that the level of src texture and this texture have the same dimensions. + void CopyToTexture(AbstractTexture* dst, u32 dst_layer = 0, u32 dst_level = 0); + + // Maps the texture into the CPU address space, enabling it to read the contents. + // The Map call may not perform synchronization. If the contents of the staging texture + // has been updated by a CopyFromTexture call, you must call Flush() first. + // If persistent mapping is supported in the backend, this may be a no-op. + virtual bool Map() = 0; + + // Unmaps the CPU-readable copy of the texture. May be a no-op on backends which + // support persistent-mapped buffers. + virtual void Unmap() = 0; + + // Flushes pending writes from the CPU to the GPU, and reads from the GPU to the CPU. + // This may cause a command buffer flush depending on if one has occurred between the last + // call to CopyFromTexture()/CopyToTexture() and the Flush() call. + virtual void Flush() = 0; + + // Reads the specified rectangle from the staging texture to out_ptr, with the specified stride + // (length in bytes of each row). CopyFromTexture must be called first. The contents of any + // texels outside of the rectangle used for CopyFromTexture is undefined. + void ReadTexels(const MathUtil::Rectangle& rect, void* out_ptr, u32 out_stride); + void ReadTexel(u32 x, u32 y, void* out_ptr); + + // Copies the texels from in_ptr to the staging texture, which can be read by the GPU, with the + // specified stride (length in bytes of each row). After updating the staging texture with all + // changes, call CopyToTexture() to update the GPU copy. + void WriteTexels(const MathUtil::Rectangle& rect, const void* in_ptr, u32 in_stride); + void WriteTexel(u32 x, u32 y, const void* in_ptr); + +protected: + bool PrepareForAccess(); + + const StagingTextureType m_type; + const TextureConfig m_config; + const size_t m_texel_size; + + char* m_map_pointer = nullptr; + size_t m_map_stride = 0; + + bool m_needs_flush = false; +}; diff --git a/Source/Core/VideoCommon/AbstractTexture.cpp b/Source/Core/VideoCommon/AbstractTexture.cpp index 9fa099e602..7e09be5344 100644 --- a/Source/Core/VideoCommon/AbstractTexture.cpp +++ b/Source/Core/VideoCommon/AbstractTexture.cpp @@ -100,7 +100,7 @@ AbstractTexture::MapRegionImpl(u32 level, u32 x, u32 y, u32 width, u32 height) return {}; } -bool AbstractTexture::IsCompressedHostTextureFormat(AbstractTextureFormat format) +bool AbstractTexture::IsCompressedFormat(AbstractTextureFormat format) { switch (format) { @@ -115,7 +115,7 @@ bool AbstractTexture::IsCompressedHostTextureFormat(AbstractTextureFormat format } } -size_t AbstractTexture::CalculateHostTextureLevelPitch(AbstractTextureFormat format, u32 row_length) +size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length) { switch (format) { @@ -134,6 +134,25 @@ size_t AbstractTexture::CalculateHostTextureLevelPitch(AbstractTextureFormat for } } +size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + return 8; + case AbstractTextureFormat::DXT3: + case AbstractTextureFormat::DXT5: + case AbstractTextureFormat::BPTC: + return 16; + case AbstractTextureFormat::RGBA8: + case AbstractTextureFormat::BGRA8: + return 4; + default: + PanicAlert("Unhandled texture format."); + return 0; + } +} + const TextureConfig& AbstractTexture::GetConfig() const { return m_config; diff --git a/Source/Core/VideoCommon/AbstractTexture.h b/Source/Core/VideoCommon/AbstractTexture.h index 612ca34758..fd4b4bb621 100644 --- a/Source/Core/VideoCommon/AbstractTexture.h +++ b/Source/Core/VideoCommon/AbstractTexture.h @@ -39,8 +39,9 @@ public: virtual void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) = 0; - static bool IsCompressedHostTextureFormat(AbstractTextureFormat format); - static size_t CalculateHostTextureLevelPitch(AbstractTextureFormat format, u32 row_length); + static bool IsCompressedFormat(AbstractTextureFormat format); + static size_t CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length); + static size_t GetTexelSizeForFormat(AbstractTextureFormat format); const TextureConfig& GetConfig() const; diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 4df4a01550..eff095d628 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -1,4 +1,5 @@ set(SRCS + AbstractStagingTexture.cpp AbstractTexture.cpp AsyncRequests.cpp AsyncShaderCompiler.cpp diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 7158f3f18e..b614c6a375 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -34,9 +34,11 @@ class AbstractRawTexture; class AbstractTexture; +class AbstractStagingTexture; class PostProcessingShaderImplementation; struct TextureConfig; enum class EFBAccessType; +enum class StagingTextureType; struct EfbPokeData { @@ -81,6 +83,8 @@ public: virtual void ResetAPIState() {} virtual void RestoreAPIState() {} virtual std::unique_ptr CreateTexture(const TextureConfig& config) = 0; + virtual std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) = 0; // Ideal internal resolution - multiple of the native EFB resolution int GetTargetWidth() const { return m_target_width; } diff --git a/Source/Core/VideoCommon/TextureConfig.cpp b/Source/Core/VideoCommon/TextureConfig.cpp index 7a6930c798..d8155d59b1 100644 --- a/Source/Core/VideoCommon/TextureConfig.cpp +++ b/Source/Core/VideoCommon/TextureConfig.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "VideoCommon/TextureConfig.h" +#include "VideoCommon/AbstractTexture.h" #include @@ -12,7 +13,28 @@ bool TextureConfig::operator==(const TextureConfig& o) const std::tie(o.width, o.height, o.levels, o.layers, o.format, o.rendertarget); } +bool TextureConfig::operator!=(const TextureConfig& o) const +{ + return !operator==(o); +} + MathUtil::Rectangle TextureConfig::GetRect() const { return {0, 0, static_cast(width), static_cast(height)}; } + +MathUtil::Rectangle TextureConfig::GetMipRect(u32 level) const +{ + return {0, 0, static_cast(std::max(width >> level, 1u)), + static_cast(std::max(height >> level, 1u))}; +} + +size_t TextureConfig::GetStride() const +{ + return AbstractTexture::CalculateStrideForFormat(format, width); +} + +size_t TextureConfig::GetMipStride(u32 level) const +{ + return AbstractTexture::CalculateStrideForFormat(format, std::max(width >> level, 1u)); +} diff --git a/Source/Core/VideoCommon/TextureConfig.h b/Source/Core/VideoCommon/TextureConfig.h index ae6e54de5e..a212c7f86a 100644 --- a/Source/Core/VideoCommon/TextureConfig.h +++ b/Source/Core/VideoCommon/TextureConfig.h @@ -21,6 +21,13 @@ enum class AbstractTextureFormat : u32 Undefined }; +enum class StagingTextureType +{ + Readback, // Optimize for CPU reads, GPU writes, no CPU writes + Upload, // Optimize for CPU writes, GPU reads, no CPU reads + Mutable // Optimize for CPU reads, GPU writes, allow slow CPU reads +}; + struct TextureConfig { constexpr TextureConfig() = default; @@ -32,7 +39,11 @@ struct TextureConfig } bool operator==(const TextureConfig& o) const; + bool operator!=(const TextureConfig& o) const; MathUtil::Rectangle GetRect() const; + MathUtil::Rectangle GetMipRect(u32 level) const; + size_t GetStride() const; + size_t GetMipStride(u32 level) const; u32 width = 0; u32 height = 0; diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 6bdd57a9fd..83be20714f 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -36,6 +36,7 @@ + @@ -96,6 +97,7 @@ + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 815fd86bbb..e77cceec23 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -191,6 +191,9 @@ Shader Generators + + Base + @@ -362,6 +365,9 @@ Shader Generators + + Base +