Fix OGL perf queries and make them not slow!

This commit is contained in:
Jordan Woyak
2013-02-16 17:50:40 -06:00
parent 54947b1e22
commit 53aec6c476
9 changed files with 178 additions and 43 deletions

View File

@ -1,31 +1,47 @@
#include "GLUtil.h"
#include "PerfQuery.h"
namespace OGL {
u32 results[PQG_NUM_MEMBERS] = { 0 };
GLuint query_id;
PerfQueryGroup active_query;
namespace OGL
{
PerfQuery::PerfQuery()
: m_query_read_pos()
, m_query_count()
{
glGenQueries(1, &query_id);
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
glGenQueries(1, &m_query_buffer[i].query_id);
ResetQuery();
}
PerfQuery::~PerfQuery()
{
glDeleteQueries(1, &query_id);
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
glDeleteQueries(1, &m_query_buffer[i].query_id);
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
WeakFlush();
if (ARRAYSIZE(m_query_buffer) == m_query_count)
{
FlushOne();
//ERROR_LOG(VIDEO, "flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glBeginQuery(GL_SAMPLES_PASSED, query_id);
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
glBeginQuery(GL_SAMPLES_PASSED, entry.query_id);
entry.query_type = type;
++m_query_count;
}
active_query = type;
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
@ -34,45 +50,82 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndQuery(GL_SAMPLES_PASSED);
}
}
GLuint query_result = GL_FALSE;
while (query_result != GL_TRUE)
bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
}
void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = 0;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);
m_results[entry.query_type] += result;
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
void PerfQuery::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = GL_FALSE;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);
if (GL_TRUE == result)
{
glGetQueryObjectuiv(query_id, GL_QUERY_RESULT_AVAILABLE, &query_result);
FlushOne();
}
else
{
break;
}
glGetQueryObjectuiv(query_id, GL_QUERY_RESULT, &query_result);
results[active_query] += query_result;
}
}
void PerfQuery::ResetQuery()
{
memset(results, 0, sizeof(results));
m_query_count = 0;
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC || type == PQ_BLEND_INPUT)
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
}
if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT || type == PQ_BLEND_INPUT)
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
}
if (type == PQ_BLEND_INPUT)
else if (type == PQ_BLEND_INPUT)
{
results[PQ_BLEND_INPUT] = results[PQ_ZCOMP_OUTPUT] + results[PQ_ZCOMP_OUTPUT_ZCOMPLOC];
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
}
if (type == PQ_EFB_COPY_CLOCKS)
else if (type == PQ_EFB_COPY_CLOCKS)
{
// TODO
result = m_results[PQG_EFB_COPY_CLOCKS];
}
return results[type];
return result / 4;
}
} // namespace

View File

@ -15,6 +15,30 @@ public:
void DisableQuery(PerfQueryGroup type);
void ResetQuery();
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;
private:
struct ActiveQuery
{
GLuint query_id;
PerfQueryGroup query_type;
};
// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;
void WeakFlush();
// Only use when non-empty
void FlushOne();
// This contains gl query objects with unretrieved results.
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;
// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
};
} // namespace

View File

@ -211,7 +211,7 @@ void VertexManager::vFlush()
g_perf_query->EnableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
Draw();
g_perf_query->DisableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT));
//ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT));
// run through vertex groups again to set alpha
if (useDstAlpha && !dualSourcePossible)

View File

@ -176,6 +176,12 @@ u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputDa
return value;
}
u32 VideoSoftware::Video_GetQueryResult(PerfQueryType type)
{
// TODO:
return 0;
}
bool VideoSoftware::Video_Screenshot(const char *_szFilename)
{
return false;

View File

@ -26,7 +26,9 @@ class VideoSoftware : public VideoBackend
void Video_ExitLoop();
void Video_BeginField(u32, FieldType, u32, u32);
void Video_EndField();
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32);
u32 Video_GetQueryResult(PerfQueryType type);
void Video_AddMessage(const char* pstr, unsigned int milliseconds);
void Video_ClearMessages();