Use main buffers for utility draws

This commit is contained in:
Stenzek
2018-11-27 17:16:53 +10:00
parent 5ca18ff04e
commit 7afd5cc2fb
32 changed files with 533 additions and 681 deletions

View File

@ -259,6 +259,21 @@ void ProgramShaderCache::UploadConstants()
}
}
void ProgramShaderCache::UploadConstants(const void* data, u32 data_size)
{
// allocate and copy
const u32 alloc_size = Common::AlignUp(data_size, s_ubo_align);
auto buffer = s_buffer->Map(alloc_size, s_ubo_align);
std::memcpy(buffer.first, data, data_size);
s_buffer->Unmap(alloc_size);
// bind the same sub-buffer to all stages
for (u32 index = 1; index <= 3; index++)
glBindBufferRange(GL_UNIFORM_BUFFER, index, s_buffer->m_buffer, buffer.second, data_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size);
}
bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
const std::string& pcode, const std::string& gcode)
{
@ -539,6 +554,11 @@ void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format)
s_last_VAO = new_VAO;
}
bool ProgramShaderCache::IsValidVertexFormatBound()
{
return s_last_VAO != 0 && s_last_VAO != s_attributeless_VAO;
}
void ProgramShaderCache::InvalidateVertexFormat()
{
s_last_VAO = 0;

View File

@ -69,6 +69,7 @@ class ProgramShaderCache
{
public:
static void BindVertexFormat(const GLVertexFormat* vertex_format);
static bool IsValidVertexFormatBound();
static void InvalidateVertexFormat();
static void InvalidateLastProgram();
@ -83,6 +84,7 @@ public:
static u32 GetUniformBufferAlignment();
static void InvalidateConstants();
static void UploadConstants();
static void UploadConstants(const void* data, u32 data_size);
static void Init();
static void Shutdown();

View File

@ -1184,6 +1184,27 @@ void Renderer::SetViewport(float x, float y, float width, float height, float ne
glDepthRangef(near_depth, far_depth);
}
void Renderer::Draw(u32 base_vertex, u32 num_vertices)
{
glDrawArrays(static_cast<const OGLPipeline*>(m_graphics_pipeline)->GetGLPrimitive(), base_vertex,
num_vertices);
}
void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
{
if (g_ogl_config.bSupportsGLBaseVertex)
{
glDrawElementsBaseVertex(static_cast<const OGLPipeline*>(m_graphics_pipeline)->GetGLPrimitive(),
num_indices, GL_UNSIGNED_SHORT,
static_cast<u16*>(nullptr) + base_index, base_vertex);
}
else
{
glDrawElements(static_cast<const OGLPipeline*>(m_graphics_pipeline)->GetGLPrimitive(),
num_indices, GL_UNSIGNED_SHORT, static_cast<u16*>(nullptr) + base_index);
}
}
void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z)
{
@ -1675,54 +1696,6 @@ void Renderer::SetInterlacingMode()
// TODO
}
void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices,
u32 vertex_stride, u32 num_vertices)
{
// Copy in uniforms.
if (uniforms_size > 0)
UploadUtilityUniforms(uniforms, uniforms_size);
// Draw from base index if there is vertex data.
if (vertices)
{
StreamBuffer* vbuf = static_cast<VertexManager*>(g_vertex_manager.get())->GetVertexBuffer();
auto buf = vbuf->Map(vertex_stride * num_vertices, vertex_stride);
std::memcpy(buf.first, vertices, vertex_stride * num_vertices);
vbuf->Unmap(vertex_stride * num_vertices);
glDrawArrays(m_graphics_pipeline->GetGLPrimitive(), buf.second / vertex_stride, num_vertices);
}
else
{
glDrawArrays(m_graphics_pipeline->GetGLPrimitive(), 0, num_vertices);
}
}
void Renderer::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
DEBUG_ASSERT(uniforms_size > 0);
auto buf = ProgramShaderCache::GetUniformBuffer()->Map(
uniforms_size, ProgramShaderCache::GetUniformBufferAlignment());
std::memcpy(buf.first, uniforms, uniforms_size);
ProgramShaderCache::GetUniformBuffer()->Unmap(uniforms_size);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, ProgramShaderCache::GetUniformBuffer()->m_buffer,
buf.second, uniforms_size);
// This is rather horrible, but because of how the UBOs are bound, this forces it to rebind.
ProgramShaderCache::InvalidateConstants();
}
void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* uniforms,
u32 uniforms_size, u32 groups_x, u32 groups_y, u32 groups_z)
{
glUseProgram(static_cast<const OGLShader*>(shader)->GetGLComputeProgramID());
if (uniforms_size > 0)
UploadUtilityUniforms(uniforms, uniforms_size);
glDispatchCompute(groups_x, groups_y, groups_z);
ProgramShaderCache::InvalidateLastProgram();
}
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<SharedContextAsyncShaderCompiler>();

View File

@ -116,6 +116,8 @@ public:
void SetInterlacingMode() override;
void SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void RenderText(const std::string& text, int left, int top, u32 color) override;
@ -137,18 +139,14 @@ public:
void ReinterpretPixelData(unsigned int convtype) override;
void DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices,
u32 vertex_stride, u32 num_vertices) override;
void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size,
u32 groups_x, u32 groups_y, u32 groups_z) override;
std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler() override;
// Only call methods from this on the GPU thread.
GLContext* GetMainGLContext() const { return m_main_gl_context.get(); }
bool IsGLES() const { return m_main_gl_context->IsGLES(); }
const OGLPipeline* GetCurrentGraphicsPipeline() const { return m_graphics_pipeline; }
private:
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc,
const TargetRectangle& targetPixelRc, const void* data);
@ -165,7 +163,6 @@ private:
void ApplyBlendingState(const BlendingState state, bool force = false);
void ApplyRasterizationState(const RasterizationState state, bool force = false);
void ApplyDepthState(const DepthState state, bool force = false);
void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size);
std::unique_ptr<GLContext> m_main_gl_context;
std::array<const AbstractTexture*, 8> m_bound_textures{};

View File

@ -19,6 +19,8 @@ public:
static std::unique_ptr<StreamBuffer> Create(u32 type, u32 size);
virtual ~StreamBuffer();
u32 GetCurrentOffset() const { return m_iterator; }
/* This mapping function will return a pair of:
* - the pointer to the mapped buffer
* - the offset into the real GPU buffer (always multiple of stride)

View File

@ -15,6 +15,7 @@
#include "Common/StringUtil.h"
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/OGLPipeline.h"
#include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/StreamBuffer.h"
@ -31,11 +32,6 @@ namespace OGL
const u32 MAX_IBUFFER_SIZE = 2 * 1024 * 1024;
const u32 MAX_VBUFFER_SIZE = 32 * 1024 * 1024;
static std::unique_ptr<StreamBuffer> s_vertexBuffer;
static std::unique_ptr<StreamBuffer> s_indexBuffer;
static size_t s_baseVertex;
static size_t s_index_offset;
VertexManager::VertexManager() : m_cpu_v_buffer(MAX_VBUFFER_SIZE), m_cpu_i_buffer(MAX_IBUFFER_SIZE)
{
CreateDeviceObjects();
@ -48,58 +44,45 @@ VertexManager::~VertexManager()
void VertexManager::CreateDeviceObjects()
{
s_vertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE);
m_vertex_buffers = s_vertexBuffer->m_buffer;
s_indexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE);
m_index_buffers = s_indexBuffer->m_buffer;
m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE);
m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE);
}
void VertexManager::DestroyDeviceObjects()
{
s_vertexBuffer.reset();
s_indexBuffer.reset();
m_vertex_buffer.reset();
m_index_buffer.reset();
}
StreamBuffer* VertexManager::GetVertexBuffer() const
void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
return s_vertexBuffer.get();
}
OGL::StreamBuffer* VertexManager::GetIndexBuffer() const
{
return s_indexBuffer.get();
ProgramShaderCache::InvalidateConstants();
ProgramShaderCache::UploadConstants(uniforms, uniforms_size);
}
GLuint VertexManager::GetVertexBufferHandle() const
{
return m_vertex_buffers;
return m_vertex_buffer->m_buffer;
}
GLuint VertexManager::GetIndexBufferHandle() const
{
return m_index_buffers;
return m_index_buffer->m_buffer;
}
void VertexManager::PrepareDrawBuffers(u32 stride)
static void CheckBufferBinding()
{
u32 vertex_data_size = IndexGenerator::GetNumVerts() * stride;
u32 index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16);
// The index buffer is part of the VAO state, therefore we need to bind it first.
const GLVertexFormat* vertex_format =
static_cast<GLVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
ProgramShaderCache::BindVertexFormat(vertex_format);
s_vertexBuffer->Unmap(vertex_data_size);
s_indexBuffer->Unmap(index_data_size);
ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size);
ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size);
if (!ProgramShaderCache::IsValidVertexFormatBound())
{
ProgramShaderCache::BindVertexFormat(
static_cast<GLVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat()));
}
}
void VertexManager::ResetBuffer(u32 stride)
void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all)
{
if (m_cull_all)
if (cull_all)
{
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_v_buffer.data();
@ -109,68 +92,41 @@ void VertexManager::ResetBuffer(u32 stride)
}
else
{
// The index buffer is part of the VAO state, therefore we need to bind it first.
const GLVertexFormat* vertex_format =
static_cast<GLVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
ProgramShaderCache::BindVertexFormat(vertex_format);
CheckBufferBinding();
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride);
m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first;
m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE;
s_baseVertex = buffer.second / stride;
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16));
IndexGenerator::Start((u16*)buffer.first);
s_index_offset = buffer.second;
}
}
void VertexManager::Draw(u32 stride)
void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
{
u32 index_size = IndexGenerator::GetIndexLen();
u32 max_index = IndexGenerator::GetNumVerts();
GLenum primitive_mode = 0;
u32 vertex_data_size = num_vertices * vertex_stride;
u32 index_data_size = num_indices * sizeof(u16);
switch (m_current_primitive_type)
{
case PrimitiveType::Points:
primitive_mode = GL_POINTS;
break;
case PrimitiveType::Lines:
primitive_mode = GL_LINES;
break;
case PrimitiveType::Triangles:
primitive_mode = GL_TRIANGLES;
break;
case PrimitiveType::TriangleStrip:
primitive_mode = GL_TRIANGLE_STRIP;
break;
}
*out_base_vertex = vertex_stride > 0 ? (m_vertex_buffer->GetCurrentOffset() / vertex_stride) : 0;
*out_base_index = m_index_buffer->GetCurrentOffset() / sizeof(u16);
if (g_ogl_config.bSupportsGLBaseVertex)
{
glDrawRangeElementsBaseVertex(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT,
(u8*)nullptr + s_index_offset, (GLint)s_baseVertex);
}
else
{
glDrawRangeElements(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT,
(u8*)nullptr + s_index_offset);
}
CheckBufferBinding();
m_vertex_buffer->Unmap(vertex_data_size);
m_index_buffer->Unmap(index_data_size);
INCSTAT(stats.thisFrame.numDrawCalls);
ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size);
ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size);
}
void VertexManager::vFlush()
void VertexManager::UploadConstants()
{
GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
u32 stride = nativeVertexFmt->GetVertexStride();
PrepareDrawBuffers(stride);
// upload global constants
ProgramShaderCache::UploadConstants();
}
void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
{
if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
{
glEnable(GL_STENCIL_TEST);
@ -178,8 +134,8 @@ void VertexManager::vFlush()
if (m_current_pipeline_object)
{
g_renderer->SetPipeline(m_current_pipeline_object);
Draw(stride);
static_cast<Renderer*>(g_renderer.get())->SetPipeline(m_current_pipeline_object);
static_cast<Renderer*>(g_renderer.get())->DrawIndexed(base_index, num_indices, base_vertex);
}
if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
@ -191,5 +147,4 @@ void VertexManager::vFlush()
g_Config.iSaveTargetId++;
ClearEFBCache();
}
} // namespace
} // namespace OGL

View File

@ -35,27 +35,26 @@ public:
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
void CreateDeviceObjects() override;
void DestroyDeviceObjects() override;
void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override;
StreamBuffer* GetVertexBuffer() const;
StreamBuffer* GetIndexBuffer() const;
GLuint GetVertexBufferHandle() const;
GLuint GetIndexBufferHandle() const;
protected:
void ResetBuffer(u32 stride) override;
void CreateDeviceObjects() override;
void DestroyDeviceObjects() override;
void ResetBuffer(u32 vertex_stride, bool cull_all) override;
void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex,
u32* out_base_index) override;
void UploadConstants() override;
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;
private:
void Draw(u32 stride);
void vFlush() override;
void PrepareDrawBuffers(u32 stride);
GLuint m_vertex_buffers;
GLuint m_index_buffers;
std::unique_ptr<StreamBuffer> m_vertex_buffer;
std::unique_ptr<StreamBuffer> m_index_buffer;
// Alternative buffers in CPU memory for primatives we are going to discard.
std::vector<u8> m_cpu_v_buffer;
std::vector<u16> m_cpu_i_buffer;
};
}
} // namespace OGL