dolphin/Source/Core/VideoBackends/OGL/PerfQuery.cpp

270 lines
6.1 KiB
C++
Raw Normal View History

// Copyright 2012 Dolphin Emulator Project
2015-05-17 17:08:10 -06:00
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <memory>
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/GL/GLInterfaceBase.h"
#include "Common/GL/GLUtil.h"
#include "VideoBackends/OGL/PerfQuery.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/VideoConfig.h"
namespace OGL
{
std::unique_ptr<PerfQueryBase> GetPerfQuery()
{
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3 &&
GLExtensions::Supports("GL_NV_occlusion_query_samples"))
return std::make_unique<PerfQueryGLESNV>();
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
return std::make_unique<PerfQueryGL>(GL_ANY_SAMPLES_PASSED);
return std::make_unique<PerfQueryGL>(GL_SAMPLES_PASSED);
}
PerfQuery::PerfQuery() : m_query_read_pos()
{
ResetQuery();
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
m_query->EnableQuery(type);
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
{
m_query->DisableQuery(type);
}
bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
m_query->FlushResults();
}
void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill_n(m_results, ArraySize(m_results), 0);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS];
}
return result;
}
// Implementations
PerfQueryGL::PerfQueryGL(GLenum query_type) : m_query_type(query_type)
{
for (ActiveQuery& query : m_query_buffer)
glGenQueries(1, &query.query_id);
}
PerfQueryGL::~PerfQueryGL()
{
for (ActiveQuery& query : m_query_buffer)
glDeleteQueries(1, &query.query_id);
}
void PerfQueryGL::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
WeakFlush();
if (m_query_buffer.size() == m_query_count)
{
FlushOne();
// ERROR_LOG(VIDEO, "Flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
glBeginQuery(m_query_type, entry.query_id);
entry.query_type = type;
++m_query_count;
}
}
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndQuery(m_query_type);
}
}
void PerfQueryGL::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = GL_FALSE;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);
if (GL_TRUE == result)
{
FlushOne();
}
else
{
break;
}
}
}
void PerfQueryGL::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = 0;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);
// NOTE: Reported pixel metrics should be referenced to native resolution
// TODO: Dropping the lower 2 bits from this count should be closer to actual
// hardware behavior when drawing triangles.
result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
// Adjust for multisampling
if (g_ActiveConfig.iMultisamples > 1)
result /= g_ActiveConfig.iMultisamples;
m_results[entry.query_type] += result;
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQueryGL::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
PerfQueryGLESNV::PerfQueryGLESNV()
{
for (ActiveQuery& query : m_query_buffer)
glGenOcclusionQueriesNV(1, &query.query_id);
}
PerfQueryGLESNV::~PerfQueryGLESNV()
{
for (ActiveQuery& query : m_query_buffer)
glDeleteOcclusionQueriesNV(1, &query.query_id);
}
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
WeakFlush();
if (m_query_buffer.size() == m_query_count)
{
FlushOne();
// ERROR_LOG(VIDEO, "Flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
glBeginOcclusionQueryNV(entry.query_id);
entry.query_type = type;
++m_query_count;
}
}
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndOcclusionQueryNV();
}
}
void PerfQueryGLESNV::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = GL_FALSE;
glGetOcclusionQueryuivNV(entry.query_id, GL_PIXEL_COUNT_AVAILABLE_NV, &result);
if (GL_TRUE == result)
{
FlushOne();
}
else
{
break;
}
}
}
void PerfQueryGLESNV::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = 0;
glGetOcclusionQueryuivNV(entry.query_id, GL_OCCLUSION_TEST_RESULT_HP, &result);
// NOTE: Reported pixel metrics should be referenced to native resolution
// TODO: Dropping the lower 2 bits from this count should be closer to actual
// hardware behavior when drawing triangles.
m_results[entry.query_type] += static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQueryGLESNV::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
} // namespace