dolphin/Source/Core/VideoBackends/OGL/BoundingBox.cpp
Michael Maltese 134317e07f videoconfig: add BBoxPreferStencilImplementation
@JMC47 requested this to be able to do performance comparisons.
2017-03-15 17:41:33 -07:00

176 lines
4.9 KiB
C++

// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include "Common/GL/GLUtil.h"
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/FramebufferManager.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/VideoConfig.h"
static GLuint s_bbox_buffer_id;
static GLuint s_pbo;
static std::array<int, 4> s_stencil_bounds;
static bool s_stencil_updated;
static bool s_stencil_cleared;
static int s_target_width;
static int s_target_height;
namespace OGL
{
void BoundingBox::SetTargetSizeChanged(int target_width, int target_height)
{
if (g_ActiveConfig.BBoxUseFragmentShaderImplementation())
return;
s_target_width = target_width;
s_target_height = target_height;
s_stencil_updated = false;
glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo);
glBufferData(GL_PIXEL_PACK_BUFFER, s_target_width * s_target_height, nullptr, GL_STREAM_READ);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
void BoundingBox::Init(int target_width, int target_height)
{
if (g_ActiveConfig.BBoxUseFragmentShaderImplementation())
{
int initial_values[4] = {0, 0, 0, 0};
glGenBuffers(1, &s_bbox_buffer_id);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id);
}
else
{
s_stencil_bounds = {{0, 0, 0, 0}};
glGenBuffers(1, &s_pbo);
SetTargetSizeChanged(target_width, target_height);
}
}
void BoundingBox::Shutdown()
{
if (g_ActiveConfig.BBoxUseFragmentShaderImplementation())
{
glDeleteBuffers(1, &s_bbox_buffer_id);
}
else
{
glDeleteBuffers(1, &s_pbo);
}
}
void BoundingBox::Set(int index, int value)
{
if (g_ActiveConfig.BBoxUseFragmentShaderImplementation())
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
else
{
s_stencil_bounds[index] = value;
if (!s_stencil_cleared)
{
// Assumes that the EFB framebuffer is currently bound
glClearStencil(0);
glClear(GL_STENCIL_BUFFER_BIT);
s_stencil_updated = false;
s_stencil_cleared = true;
}
}
}
int BoundingBox::Get(int index)
{
if (g_ActiveConfig.BBoxUseFragmentShaderImplementation())
{
int data = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA))
{
// Using glMapBufferRange to read back the contents of the SSBO is extremely slow
// on nVidia drivers. This is more noticeable at higher internal resolutions.
// Using glGetBufferSubData instead does not seem to exhibit this slowdown.
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data);
}
else
{
// Using glMapBufferRange is faster on AMD cards by a measurable margin.
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int),
GL_MAP_READ_BIT);
if (ptr)
{
memcpy(&data, ptr, sizeof(int));
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return data;
}
else
{
if (s_stencil_updated)
{
s_stencil_updated = false;
FramebufferManager::ResolveEFBStencilTexture();
glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer());
glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glReadPixels(0, 0, s_target_width, s_target_height, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer());
// Eke every bit of performance out of the compiler that we can
std::array<int, 4> bounds = s_stencil_bounds;
u8* data = static_cast<u8*>(glMapBufferRange(
GL_PIXEL_PACK_BUFFER, 0, s_target_height * s_target_width, GL_MAP_READ_BIT));
for (int row = 0; row < s_target_height; row++)
{
for (int col = 0; col < s_target_width; col++)
{
if (data[row * s_target_width + col] == 0)
continue;
bounds[0] = std::min(bounds[0], col);
bounds[1] = std::max(bounds[1], col);
bounds[2] = std::min(bounds[2], row);
bounds[3] = std::max(bounds[3], row);
}
}
s_stencil_bounds = bounds;
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
return s_stencil_bounds[index];
}
}
void BoundingBox::StencilWasUpdated()
{
s_stencil_updated = true;
s_stencil_cleared = false;
}
bool BoundingBox::NeedsStencilBuffer()
{
return g_ActiveConfig.bBBoxEnable && !g_ActiveConfig.BBoxUseFragmentShaderImplementation();
}
};