Reformat all the things. Have fun with merge conflicts.

This commit is contained in:
Pierre Bourdon
2016-06-24 10:43:46 +02:00
parent 2115e8a4a6
commit 3570c7f03a
1116 changed files with 187405 additions and 180344 deletions

View File

@ -2,8 +2,8 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/MemoryUtil.h"
#include "Common/GL/GLUtil.h"
#include "Common/MemoryUtil.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/StreamBuffer.h"
@ -13,27 +13,26 @@
namespace OGL
{
// moved out of constructor, so m_buffer is allowed to be const
static u32 GenBuffer()
{
u32 id;
glGenBuffers(1, &id);
return id;
u32 id;
glGenBuffers(1, &id);
return id;
}
StreamBuffer::StreamBuffer(u32 type, u32 size)
: m_buffer(GenBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(IntLog2(ROUND_UP_POW2(size) / SYNC_POINTS))
: m_buffer(GenBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)),
m_bit_per_slot(IntLog2(ROUND_UP_POW2(size) / SYNC_POINTS))
{
m_iterator = 0;
m_used_iterator = 0;
m_free_iterator = 0;
m_iterator = 0;
m_used_iterator = 0;
m_free_iterator = 0;
}
StreamBuffer::~StreamBuffer()
{
glDeleteBuffers(1, &m_buffer);
glDeleteBuffers(1, &m_buffer);
}
/* Shared synchronization code for ring buffers
@ -44,76 +43,79 @@ StreamBuffer::~StreamBuffer()
* To reduce overhead, the complete buffer is splitted up into SYNC_POINTS chunks.
* For each of this chunks, there is a fence which checks if this chunk is still in use.
*
* As our API allows to alloc more memory then it has to use, we have to catch how much is already written.
* As our API allows to alloc more memory then it has to use, we have to catch how much is already
* written.
*
* m_iterator - writing position
* m_free_iterator - last position checked if free
* m_used_iterator - last position known to be written
*
* So on alloc, we have to wait for all slots between m_free_iterator and m_iterator (and set m_free_iterator to m_iterator afterwards).
* So on alloc, we have to wait for all slots between m_free_iterator and m_iterator (and set
* m_free_iterator to m_iterator afterwards).
*
* We also assume that this buffer is accessed by the GPU between the Unmap and Map function,
* so we may create the fences on the start of mapping.
* Some here, new fences for the chunks between m_used_iterator and m_iterator (also update m_used_iterator).
* Some here, new fences for the chunks between m_used_iterator and m_iterator (also update
* m_used_iterator).
*
* As ring buffers have an ugly behavior on rollover, have fun to read this code ;)
*/
void StreamBuffer::CreateFences()
{
for (int i = 0; i < SYNC_POINTS; i++)
{
m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
for (int i = 0; i < SYNC_POINTS; i++)
{
m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
}
void StreamBuffer::DeleteFences()
{
for (int i = Slot(m_free_iterator) + 1; i < SYNC_POINTS; i++)
{
glDeleteSync(m_fences[i]);
}
for (int i = 0; i < Slot(m_iterator); i++)
{
glDeleteSync(m_fences[i]);
}
for (int i = Slot(m_free_iterator) + 1; i < SYNC_POINTS; i++)
{
glDeleteSync(m_fences[i]);
}
for (int i = 0; i < Slot(m_iterator); i++)
{
glDeleteSync(m_fences[i]);
}
}
void StreamBuffer::AllocMemory(u32 size)
{
// insert waiting slots for used memory
for (int i = Slot(m_used_iterator); i < Slot(m_iterator); i++)
{
m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
m_used_iterator = m_iterator;
// insert waiting slots for used memory
for (int i = Slot(m_used_iterator); i < Slot(m_iterator); i++)
{
m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
m_used_iterator = m_iterator;
// wait for new slots to end of buffer
for (int i = Slot(m_free_iterator) + 1; i <= Slot(m_iterator + size) && i < SYNC_POINTS; i++)
{
glClientWaitSync(m_fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(m_fences[i]);
}
m_free_iterator = m_iterator + size;
// wait for new slots to end of buffer
for (int i = Slot(m_free_iterator) + 1; i <= Slot(m_iterator + size) && i < SYNC_POINTS; i++)
{
glClientWaitSync(m_fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(m_fences[i]);
}
m_free_iterator = m_iterator + size;
// if buffer is full
if (m_iterator + size >= m_size)
{
// insert waiting slots in unused space at the end of the buffer
for (int i = Slot(m_used_iterator); i < SYNC_POINTS; i++)
{
m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
// if buffer is full
if (m_iterator + size >= m_size)
{
// insert waiting slots in unused space at the end of the buffer
for (int i = Slot(m_used_iterator); i < SYNC_POINTS; i++)
{
m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
// move to the start
m_used_iterator = m_iterator = 0; // offset 0 is always aligned
// move to the start
m_used_iterator = m_iterator = 0; // offset 0 is always aligned
// wait for space at the start
for (int i = 0; i <= Slot(m_iterator + size); i++)
{
glClientWaitSync(m_fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(m_fences[i]);
}
m_free_iterator = m_iterator + size;
}
// wait for space at the start
for (int i = 0; i <= Slot(m_iterator + size); i++)
{
glClientWaitSync(m_fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(m_fences[i]);
}
m_free_iterator = m_iterator + size;
}
}
/* The usual way to stream data to the GPU.
@ -126,34 +128,32 @@ void StreamBuffer::AllocMemory(u32 size)
class MapAndOrphan : public StreamBuffer
{
public:
MapAndOrphan(u32 type, u32 size) : StreamBuffer(type, size)
{
glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
}
MapAndOrphan(u32 type, u32 size) : StreamBuffer(type, size)
{
glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
}
~MapAndOrphan()
{
}
~MapAndOrphan() {}
std::pair<u8*, u32> Map(u32 size) override
{
if (m_iterator + size >= m_size)
{
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
m_iterator = 0;
}
u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
GL_MAP_UNSYNCHRONIZED_BIT);
return std::make_pair(pointer, m_iterator);
}
std::pair<u8*, u32> Map(u32 size) override
{
if (m_iterator + size >= m_size)
{
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
m_iterator = 0;
}
u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
return std::make_pair(pointer, m_iterator);
}
void Unmap(u32 used_size) override
{
glFlushMappedBufferRange(m_buffertype, 0, used_size);
glUnmapBuffer(m_buffertype);
m_iterator += used_size;
}
void Unmap(u32 used_size) override
{
glFlushMappedBufferRange(m_buffertype, 0, used_size);
glUnmapBuffer(m_buffertype);
m_iterator += used_size;
}
};
/* A modified streaming way without reallocation
@ -166,32 +166,29 @@ public:
class MapAndSync : public StreamBuffer
{
public:
MapAndSync(u32 type, u32 size) : StreamBuffer(type, size)
{
CreateFences();
glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
}
MapAndSync(u32 type, u32 size) : StreamBuffer(type, size)
{
CreateFences();
glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
}
~MapAndSync()
{
DeleteFences();
}
~MapAndSync() { DeleteFences(); }
std::pair<u8*, u32> Map(u32 size) override
{
AllocMemory(size);
u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
GL_MAP_UNSYNCHRONIZED_BIT);
return std::make_pair(pointer, m_iterator);
}
std::pair<u8*, u32> Map(u32 size) override
{
AllocMemory(size);
u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
return std::make_pair(pointer, m_iterator);
}
void Unmap(u32 used_size) override
{
glFlushMappedBufferRange(m_buffertype, 0, used_size);
glUnmapBuffer(m_buffertype);
m_iterator += used_size;
}
void Unmap(u32 used_size) override
{
glFlushMappedBufferRange(m_buffertype, 0, used_size);
glUnmapBuffer(m_buffertype);
m_iterator += used_size;
}
};
/* Streaming fifo without mapping overhead.
@ -210,42 +207,45 @@ public:
class BufferStorage : public StreamBuffer
{
public:
BufferStorage(u32 type, u32 size, bool _coherent = false) : StreamBuffer(type, size), coherent(_coherent)
{
CreateFences();
glBindBuffer(m_buffertype, m_buffer);
BufferStorage(u32 type, u32 size, bool _coherent = false)
: StreamBuffer(type, size), coherent(_coherent)
{
CreateFences();
glBindBuffer(m_buffertype, m_buffer);
// PERSISTANT_BIT to make sure that the buffer can be used while mapped
// COHERENT_BIT is set so we don't have to use a MemoryBarrier on write
// CLIENT_STORAGE_BIT is set since we access the buffer more frequently on the client side then server side
glBufferStorage(m_buffertype, m_size, nullptr,
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0));
m_pointer = (u8*)glMapBufferRange(m_buffertype, 0, m_size,
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT));
}
// PERSISTANT_BIT to make sure that the buffer can be used while mapped
// COHERENT_BIT is set so we don't have to use a MemoryBarrier on write
// CLIENT_STORAGE_BIT is set since we access the buffer more frequently on the client side then
// server side
glBufferStorage(m_buffertype, m_size, nullptr, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT |
(coherent ? GL_MAP_COHERENT_BIT : 0));
m_pointer = (u8*)glMapBufferRange(
m_buffertype, 0, m_size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT |
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT));
}
~BufferStorage()
{
DeleteFences();
glUnmapBuffer(m_buffertype);
glBindBuffer(m_buffertype, 0);
}
~BufferStorage()
{
DeleteFences();
glUnmapBuffer(m_buffertype);
glBindBuffer(m_buffertype, 0);
}
std::pair<u8*, u32> Map(u32 size) override
{
AllocMemory(size);
return std::make_pair(m_pointer + m_iterator, m_iterator);
}
std::pair<u8*, u32> Map(u32 size) override
{
AllocMemory(size);
return std::make_pair(m_pointer + m_iterator, m_iterator);
}
void Unmap(u32 used_size) override
{
if (!coherent)
glFlushMappedBufferRange(m_buffertype, m_iterator, used_size);
m_iterator += used_size;
}
void Unmap(u32 used_size) override
{
if (!coherent)
glFlushMappedBufferRange(m_buffertype, m_iterator, used_size);
m_iterator += used_size;
}
u8* m_pointer;
const bool coherent;
u8* m_pointer;
const bool coherent;
};
/* --- AMD only ---
@ -258,38 +258,36 @@ public:
class PinnedMemory : public StreamBuffer
{
public:
PinnedMemory(u32 type, u32 size) : StreamBuffer(type, size)
{
CreateFences();
m_pointer = (u8*)AllocateAlignedMemory(ROUND_UP(m_size,ALIGN_PINNED_MEMORY), ALIGN_PINNED_MEMORY );
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_buffer);
glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, ROUND_UP(m_size,ALIGN_PINNED_MEMORY), m_pointer, GL_STREAM_COPY);
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
glBindBuffer(m_buffertype, m_buffer);
}
PinnedMemory(u32 type, u32 size) : StreamBuffer(type, size)
{
CreateFences();
m_pointer =
(u8*)AllocateAlignedMemory(ROUND_UP(m_size, ALIGN_PINNED_MEMORY), ALIGN_PINNED_MEMORY);
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_buffer);
glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, ROUND_UP(m_size, ALIGN_PINNED_MEMORY),
m_pointer, GL_STREAM_COPY);
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
glBindBuffer(m_buffertype, m_buffer);
}
~PinnedMemory()
{
DeleteFences();
glBindBuffer(m_buffertype, 0);
glFinish(); // ogl pipeline must be flushed, else this buffer can be in use
FreeAlignedMemory(m_pointer);
m_pointer = nullptr;
}
~PinnedMemory()
{
DeleteFences();
glBindBuffer(m_buffertype, 0);
glFinish(); // ogl pipeline must be flushed, else this buffer can be in use
FreeAlignedMemory(m_pointer);
m_pointer = nullptr;
}
std::pair<u8*, u32> Map(u32 size) override
{
AllocMemory(size);
return std::make_pair(m_pointer + m_iterator, m_iterator);
}
std::pair<u8*, u32> Map(u32 size) override
{
AllocMemory(size);
return std::make_pair(m_pointer + m_iterator, m_iterator);
}
void Unmap(u32 used_size) override
{
m_iterator += used_size;
}
u8* m_pointer;
static const u32 ALIGN_PINNED_MEMORY = 4096;
void Unmap(u32 used_size) override { m_iterator += used_size; }
u8* m_pointer;
static const u32 ALIGN_PINNED_MEMORY = 4096;
};
/* Fifo based on the glBufferSubData call.
@ -300,29 +298,17 @@ public:
class BufferSubData : public StreamBuffer
{
public:
BufferSubData(u32 type, u32 size) : StreamBuffer(type, size)
{
glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, size, nullptr, GL_STATIC_DRAW);
m_pointer = new u8[m_size];
}
BufferSubData(u32 type, u32 size) : StreamBuffer(type, size)
{
glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, size, nullptr, GL_STATIC_DRAW);
m_pointer = new u8[m_size];
}
~BufferSubData()
{
delete [] m_pointer;
}
std::pair<u8*, u32> Map(u32 size) override
{
return std::make_pair(m_pointer, 0);
}
void Unmap(u32 used_size) override
{
glBufferSubData(m_buffertype, 0, used_size, m_pointer);
}
u8* m_pointer;
~BufferSubData() { delete[] m_pointer; }
std::pair<u8*, u32> Map(u32 size) override { return std::make_pair(m_pointer, 0); }
void Unmap(u32 used_size) override { glBufferSubData(m_buffertype, 0, used_size, m_pointer); }
u8* m_pointer;
};
/* Fifo based on the glBufferData call.
@ -333,69 +319,63 @@ public:
class BufferData : public StreamBuffer
{
public:
BufferData(u32 type, u32 size) : StreamBuffer(type, size)
{
glBindBuffer(m_buffertype, m_buffer);
m_pointer = new u8[m_size];
}
BufferData(u32 type, u32 size) : StreamBuffer(type, size)
{
glBindBuffer(m_buffertype, m_buffer);
m_pointer = new u8[m_size];
}
~BufferData()
{
delete [] m_pointer;
}
~BufferData() { delete[] m_pointer; }
std::pair<u8*, u32> Map(u32 size) override { return std::make_pair(m_pointer, 0); }
void Unmap(u32 used_size) override
{
glBufferData(m_buffertype, used_size, m_pointer, GL_STREAM_DRAW);
}
std::pair<u8*, u32> Map(u32 size) override
{
return std::make_pair(m_pointer, 0);
}
void Unmap(u32 used_size) override
{
glBufferData(m_buffertype, used_size, m_pointer, GL_STREAM_DRAW);
}
u8* m_pointer;
u8* m_pointer;
};
// Chooses the best streaming method based on the supported extensions and known issues
std::unique_ptr<StreamBuffer> StreamBuffer::Create(u32 type, u32 size)
{
// without basevertex support, only streaming methods whith uploads everything to zero works fine:
if (!g_ogl_config.bSupportsGLBaseVertex)
{
if (!DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTREAM))
return std::make_unique<BufferSubData>(type, size);
// without basevertex support, only streaming methods whith uploads everything to zero works fine:
if (!g_ogl_config.bSupportsGLBaseVertex)
{
if (!DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTREAM))
return std::make_unique<BufferSubData>(type, size);
// BufferData is by far the worst way, only use it if needed
return std::make_unique<BufferData>(type, size);
}
// BufferData is by far the worst way, only use it if needed
return std::make_unique<BufferData>(type, size);
}
// Prefer the syncing buffers over the orphaning one
if (g_ogl_config.bSupportsGLSync)
{
// pinned memory is much faster than buffer storage on AMD cards
if (g_ogl_config.bSupportsGLPinnedMemory &&
!(DriverDetails::HasBug(DriverDetails::BUG_BROKENPINNEDMEMORY) && type == GL_ELEMENT_ARRAY_BUFFER))
return std::make_unique<PinnedMemory>(type, size);
// Prefer the syncing buffers over the orphaning one
if (g_ogl_config.bSupportsGLSync)
{
// pinned memory is much faster than buffer storage on AMD cards
if (g_ogl_config.bSupportsGLPinnedMemory &&
!(DriverDetails::HasBug(DriverDetails::BUG_BROKENPINNEDMEMORY) &&
type == GL_ELEMENT_ARRAY_BUFFER))
return std::make_unique<PinnedMemory>(type, size);
// buffer storage works well in most situations
bool coherent = DriverDetails::HasBug(DriverDetails::BUG_BROKENEXPLICITFLUSH);
if (g_ogl_config.bSupportsGLBufferStorage &&
!(DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTORAGE) && type == GL_ARRAY_BUFFER) &&
!(DriverDetails::HasBug(DriverDetails::BUG_INTELBROKENBUFFERSTORAGE) && type == GL_ELEMENT_ARRAY_BUFFER))
return std::make_unique<BufferStorage>(type, size, coherent);
// buffer storage works well in most situations
bool coherent = DriverDetails::HasBug(DriverDetails::BUG_BROKENEXPLICITFLUSH);
if (g_ogl_config.bSupportsGLBufferStorage &&
!(DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTORAGE) &&
type == GL_ARRAY_BUFFER) &&
!(DriverDetails::HasBug(DriverDetails::BUG_INTELBROKENBUFFERSTORAGE) &&
type == GL_ELEMENT_ARRAY_BUFFER))
return std::make_unique<BufferStorage>(type, size, coherent);
// don't fall back to MapAnd* for Nvidia drivers
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENUNSYNCMAPPING))
return std::make_unique<BufferSubData>(type, size);
// don't fall back to MapAnd* for Nvidia drivers
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENUNSYNCMAPPING))
return std::make_unique<BufferSubData>(type, size);
// mapping fallback
if (g_ogl_config.bSupportsGLSync)
return std::make_unique<MapAndSync>(type, size);
}
// mapping fallback
if (g_ogl_config.bSupportsGLSync)
return std::make_unique<MapAndSync>(type, size);
}
// default fallback, should work everywhere, but isn't the best way to do this job
return std::make_unique<MapAndOrphan>(type, size);
// default fallback, should work everywhere, but isn't the best way to do this job
return std::make_unique<MapAndOrphan>(type, size);
}
}