mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-22 22:00:39 -06:00
VideoBackends:Metal: Implement PerfQuery
This commit is contained in:
@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
#include "VideoCommon/PerfQueryBase.h"
|
#include "VideoCommon/PerfQueryBase.h"
|
||||||
|
|
||||||
namespace Metal
|
namespace Metal
|
||||||
@ -10,11 +13,22 @@ namespace Metal
|
|||||||
class PerfQuery final : public PerfQueryBase
|
class PerfQuery final : public PerfQueryBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void EnableQuery(PerfQueryGroup type) override {}
|
void EnableQuery(PerfQueryGroup type) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override {}
|
void DisableQuery(PerfQueryGroup type) override;
|
||||||
void ResetQuery() override {}
|
void ResetQuery() override;
|
||||||
u32 GetQueryResult(PerfQueryType type) override { return 0; }
|
u32 GetQueryResult(PerfQueryType type) override;
|
||||||
void FlushResults() override {}
|
void FlushResults() override;
|
||||||
bool IsFlushed() const override { return true; }
|
bool IsFlushed() const override;
|
||||||
|
|
||||||
|
/// Notify PerfQuery of a new pending encoder
|
||||||
|
/// One call to ReturnResults should be made for every call to IncCount
|
||||||
|
void IncCount() { m_query_count.fetch_add(1, std::memory_order_relaxed); }
|
||||||
|
/// May be called from any thread
|
||||||
|
void ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, u32 query_id);
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 m_current_query = 0;
|
||||||
|
std::mutex m_results_mtx;
|
||||||
|
std::condition_variable m_cv;
|
||||||
};
|
};
|
||||||
} // namespace Metal
|
} // namespace Metal
|
||||||
|
@ -2,3 +2,89 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "VideoBackends/Metal/MTLPerfQuery.h"
|
#include "VideoBackends/Metal/MTLPerfQuery.h"
|
||||||
|
|
||||||
|
#include "VideoBackends/Metal/MTLStateTracker.h"
|
||||||
|
|
||||||
|
void Metal::PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||||
|
{
|
||||||
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
|
g_state_tracker->EnablePerfQuery(type, m_current_query);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Metal::PerfQuery::DisableQuery(PerfQueryGroup type)
|
||||||
|
{
|
||||||
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
|
g_state_tracker->DisablePerfQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Metal::PerfQuery::ResetQuery()
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_results_mtx);
|
||||||
|
m_current_query++;
|
||||||
|
for (std::atomic<u32>& result : m_results)
|
||||||
|
result.store(0, std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
|
||||||
|
{
|
||||||
|
u32 result = 0;
|
||||||
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
else if (type == PQ_BLEND_INPUT)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
||||||
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Metal::PerfQuery::FlushResults()
|
||||||
|
{
|
||||||
|
if (IsFlushed())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// There's a possibility that some active performance queries are unflushed
|
||||||
|
g_state_tracker->FlushEncoders();
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lock(m_results_mtx);
|
||||||
|
while (!IsFlushed())
|
||||||
|
m_cv.wait(lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Metal::PerfQuery::IsFlushed() const
|
||||||
|
{
|
||||||
|
return m_query_count.load(std::memory_order_acquire) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
|
||||||
|
u32 query_id)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(m_results_mtx);
|
||||||
|
if (m_current_query == query_id)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < count; ++i)
|
||||||
|
{
|
||||||
|
u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) /
|
||||||
|
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
|
||||||
|
|
||||||
|
native_res_result /= g_ActiveConfig.iMultisamples;
|
||||||
|
|
||||||
|
m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_query_count.fetch_sub(1, std::memory_order_release);
|
||||||
|
}
|
||||||
|
m_cv.notify_one();
|
||||||
|
}
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "VideoBackends/Metal/MTLTexture.h"
|
#include "VideoBackends/Metal/MTLTexture.h"
|
||||||
#include "VideoBackends/Metal/MTLUtil.h"
|
#include "VideoBackends/Metal/MTLUtil.h"
|
||||||
|
|
||||||
|
#include "VideoCommon/PerfQueryBase.h"
|
||||||
#include "VideoCommon/RenderBase.h"
|
#include "VideoCommon/RenderBase.h"
|
||||||
|
|
||||||
namespace Metal
|
namespace Metal
|
||||||
@ -90,6 +91,8 @@ public:
|
|||||||
void SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
|
void SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
|
||||||
/// Use around utility draws that are commonly used immediately before gx draws to the same buffer
|
/// Use around utility draws that are commonly used immediately before gx draws to the same buffer
|
||||||
void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; }
|
void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; }
|
||||||
|
void EnablePerfQuery(PerfQueryGroup group, u32 query_id);
|
||||||
|
void DisablePerfQuery();
|
||||||
void UnbindTexture(id<MTLTexture> texture);
|
void UnbindTexture(id<MTLTexture> texture);
|
||||||
|
|
||||||
void Draw(u32 base_vertex, u32 num_vertices);
|
void Draw(u32 base_vertex, u32 num_vertices);
|
||||||
@ -157,8 +160,10 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct Backref;
|
struct Backref;
|
||||||
|
struct PerfQueryTracker;
|
||||||
|
|
||||||
std::shared_ptr<Backref> m_backref;
|
std::shared_ptr<Backref> m_backref;
|
||||||
|
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
|
||||||
MRCOwned<id<MTLFence>> m_fence;
|
MRCOwned<id<MTLFence>> m_fence;
|
||||||
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
|
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
|
||||||
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
|
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
|
||||||
@ -224,7 +229,9 @@ private:
|
|||||||
MTLDepthClipMode depth_clip_mode;
|
MTLDepthClipMode depth_clip_mode;
|
||||||
MTLCullMode cull_mode;
|
MTLCullMode cull_mode;
|
||||||
DepthStencilSelector depth_stencil;
|
DepthStencilSelector depth_stencil;
|
||||||
|
PerfQueryGroup perf_query_group;
|
||||||
} m_current;
|
} m_current;
|
||||||
|
std::shared_ptr<PerfQueryTracker> m_current_perf_query;
|
||||||
|
|
||||||
/// Things that represent what we'd *like* to have on the encoder for the next draw
|
/// Things that represent what we'd *like* to have on the encoder for the next draw
|
||||||
struct State
|
struct State
|
||||||
@ -250,8 +257,12 @@ private:
|
|||||||
id<MTLBuffer> texels = nullptr;
|
id<MTLBuffer> texels = nullptr;
|
||||||
u32 texel_buffer_offset0;
|
u32 texel_buffer_offset0;
|
||||||
u32 texel_buffer_offset1;
|
u32 texel_buffer_offset1;
|
||||||
|
PerfQueryGroup perf_query_group = static_cast<PerfQueryGroup>(-1);
|
||||||
} m_state;
|
} m_state;
|
||||||
|
|
||||||
|
u32 m_perf_query_tracker_counter = 0;
|
||||||
|
|
||||||
|
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
|
||||||
void SetSamplerForce(u32 idx, const SamplerState& sampler);
|
void SetSamplerForce(u32 idx, const SamplerState& sampler);
|
||||||
void Sync(BufferPair& buffer);
|
void Sync(BufferPair& buffer);
|
||||||
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
|
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "Common/BitUtils.h"
|
#include "Common/BitUtils.h"
|
||||||
|
|
||||||
#include "VideoBackends/Metal/MTLObjectCache.h"
|
#include "VideoBackends/Metal/MTLObjectCache.h"
|
||||||
|
#include "VideoBackends/Metal/MTLPerfQuery.h"
|
||||||
#include "VideoBackends/Metal/MTLPipeline.h"
|
#include "VideoBackends/Metal/MTLPipeline.h"
|
||||||
#include "VideoBackends/Metal/MTLTexture.h"
|
#include "VideoBackends/Metal/MTLTexture.h"
|
||||||
#include "VideoBackends/Metal/MTLUtil.h"
|
#include "VideoBackends/Metal/MTLUtil.h"
|
||||||
@ -19,6 +20,8 @@
|
|||||||
#include "VideoCommon/VertexShaderManager.h"
|
#include "VideoCommon/VertexShaderManager.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
|
static constexpr u32 PERF_QUERY_BUFFER_SIZE = 512;
|
||||||
|
|
||||||
std::unique_ptr<Metal::StateTracker> Metal::g_state_tracker;
|
std::unique_ptr<Metal::StateTracker> Metal::g_state_tracker;
|
||||||
|
|
||||||
struct Metal::StateTracker::Backref
|
struct Metal::StateTracker::Backref
|
||||||
@ -28,6 +31,14 @@ struct Metal::StateTracker::Backref
|
|||||||
explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
|
explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Metal::StateTracker::PerfQueryTracker
|
||||||
|
{
|
||||||
|
MRCOwned<id<MTLBuffer>> buffer;
|
||||||
|
const u64* contents;
|
||||||
|
std::vector<PerfQueryGroup> groups;
|
||||||
|
u32 query_id;
|
||||||
|
};
|
||||||
|
|
||||||
static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
|
static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
|
||||||
{
|
{
|
||||||
// clang-format off
|
// clang-format off
|
||||||
@ -328,8 +339,12 @@ void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action)
|
|||||||
void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
|
void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
|
||||||
{
|
{
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
if (m_current_perf_query)
|
||||||
|
[descriptor setVisibilityResultBuffer:m_current_perf_query->buffer];
|
||||||
m_current_render_encoder =
|
m_current_render_encoder =
|
||||||
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
|
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
|
||||||
|
if (m_current_perf_query)
|
||||||
|
[descriptor setVisibilityResultBuffer:nil];
|
||||||
if (!g_features.unified_memory)
|
if (!g_features.unified_memory)
|
||||||
[m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
|
[m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
|
||||||
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
|
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
|
||||||
@ -347,6 +362,7 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
|
|||||||
m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always);
|
m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always);
|
||||||
m_current.depth_clip_mode = MTLDepthClipModeClip;
|
m_current.depth_clip_mode = MTLDepthClipModeClip;
|
||||||
m_current.cull_mode = MTLCullModeNone;
|
m_current.cull_mode = MTLCullModeNone;
|
||||||
|
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
|
||||||
m_flags.NewEncoder();
|
m_flags.NewEncoder();
|
||||||
m_dirty_samplers = 0xff;
|
m_dirty_samplers = 0xff;
|
||||||
m_dirty_textures = 0xff;
|
m_dirty_textures = 0xff;
|
||||||
@ -411,15 +427,23 @@ void Metal::StateTracker::FlushEncoders()
|
|||||||
m_texture_upload_cmdbuf = nullptr;
|
m_texture_upload_cmdbuf = nullptr;
|
||||||
}
|
}
|
||||||
[m_current_render_cmdbuf
|
[m_current_render_cmdbuf
|
||||||
addCompletedHandler:[backref = m_backref, draw = m_current_draw](id<MTLCommandBuffer> buf) {
|
addCompletedHandler:[backref = m_backref, draw = m_current_draw,
|
||||||
|
q = std::move(m_current_perf_query)](id<MTLCommandBuffer> buf) {
|
||||||
std::lock_guard<std::mutex> guard(backref->mtx);
|
std::lock_guard<std::mutex> guard(backref->mtx);
|
||||||
if (StateTracker* tracker = backref->state_tracker)
|
if (StateTracker* tracker = backref->state_tracker)
|
||||||
{
|
{
|
||||||
// We can do the update non-atomically because we only ever update under the lock
|
// We can do the update non-atomically because we only ever update under the lock
|
||||||
u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed));
|
u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed));
|
||||||
tracker->m_last_finished_draw.store(newval, std::memory_order_release);
|
tracker->m_last_finished_draw.store(newval, std::memory_order_release);
|
||||||
|
if (q)
|
||||||
|
{
|
||||||
|
if (PerfQuery* query = static_cast<PerfQuery*>(g_perf_query.get()))
|
||||||
|
query->ReturnResults(q->contents, q->groups.data(), q->groups.size(), q->query_id);
|
||||||
|
tracker->m_perf_query_tracker_cache.emplace_back(std::move(q));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}];
|
}];
|
||||||
|
m_current_perf_query = nullptr;
|
||||||
[m_current_render_cmdbuf commit];
|
[m_current_render_cmdbuf commit];
|
||||||
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
|
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
|
||||||
m_current_render_cmdbuf = nullptr;
|
m_current_render_cmdbuf = nullptr;
|
||||||
@ -603,6 +627,57 @@ void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u3
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Metal::StateTracker::PerfQueryTracker> Metal::StateTracker::NewPerfQueryTracker()
|
||||||
|
{
|
||||||
|
static_cast<PerfQuery*>(g_perf_query.get())->IncCount();
|
||||||
|
// The cache is repopulated asynchronously
|
||||||
|
std::lock_guard<std::mutex> lock(m_backref->mtx);
|
||||||
|
if (m_perf_query_tracker_cache.empty())
|
||||||
|
{
|
||||||
|
// Make a new one
|
||||||
|
@autoreleasepool
|
||||||
|
{
|
||||||
|
std::shared_ptr<PerfQueryTracker> tracker = std::make_shared<PerfQueryTracker>();
|
||||||
|
const MTLResourceOptions options =
|
||||||
|
MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked;
|
||||||
|
id<MTLBuffer> buffer = [g_device newBufferWithLength:PERF_QUERY_BUFFER_SIZE * sizeof(u64)
|
||||||
|
options:options];
|
||||||
|
[buffer setLabel:[NSString stringWithFormat:@"PerfQuery Buffer %d",
|
||||||
|
m_perf_query_tracker_counter++]];
|
||||||
|
tracker->buffer = MRCTransfer(buffer);
|
||||||
|
tracker->contents = static_cast<const u64*>([buffer contents]);
|
||||||
|
return tracker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Reuse an old one
|
||||||
|
std::shared_ptr<PerfQueryTracker> tracker = std::move(m_perf_query_tracker_cache.back());
|
||||||
|
m_perf_query_tracker_cache.pop_back();
|
||||||
|
return tracker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Metal::StateTracker::EnablePerfQuery(PerfQueryGroup group, u32 query_id)
|
||||||
|
{
|
||||||
|
m_state.perf_query_group = group;
|
||||||
|
if (!m_current_perf_query || m_current_perf_query->query_id != query_id ||
|
||||||
|
m_current_perf_query->groups.size() == PERF_QUERY_BUFFER_SIZE)
|
||||||
|
{
|
||||||
|
if (m_current_render_encoder)
|
||||||
|
EndRenderPass();
|
||||||
|
if (!m_current_perf_query)
|
||||||
|
m_current_perf_query = NewPerfQueryTracker();
|
||||||
|
m_current_perf_query->groups.clear();
|
||||||
|
m_current_perf_query->query_id = query_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Metal::StateTracker::DisablePerfQuery()
|
||||||
|
{
|
||||||
|
m_state.perf_query_group = static_cast<PerfQueryGroup>(-1);
|
||||||
|
}
|
||||||
|
|
||||||
// MARK: Render
|
// MARK: Render
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
@ -620,6 +695,9 @@ static NSRange RangeOfBits(u32 value)
|
|||||||
|
|
||||||
void Metal::StateTracker::PrepareRender()
|
void Metal::StateTracker::PrepareRender()
|
||||||
{
|
{
|
||||||
|
// BeginRenderPass needs this
|
||||||
|
if (m_state.perf_query_group != static_cast<PerfQueryGroup>(-1) && !m_current_perf_query)
|
||||||
|
m_current_perf_query = NewPerfQueryTracker();
|
||||||
if (!m_current_render_encoder)
|
if (!m_current_render_encoder)
|
||||||
BeginRenderPass(MTLLoadActionLoad);
|
BeginRenderPass(MTLLoadActionLoad);
|
||||||
id<MTLRenderCommandEncoder> enc = m_current_render_encoder;
|
id<MTLRenderCommandEncoder> enc = m_current_render_encoder;
|
||||||
@ -710,6 +788,20 @@ void Metal::StateTracker::PrepareRender()
|
|||||||
lodMaxClamps:m_state.sampler_max_lod.data()
|
lodMaxClamps:m_state.sampler_max_lod.data()
|
||||||
withRange:range];
|
withRange:range];
|
||||||
}
|
}
|
||||||
|
if (m_state.perf_query_group != m_current.perf_query_group)
|
||||||
|
{
|
||||||
|
m_current.perf_query_group = m_state.perf_query_group;
|
||||||
|
if (m_state.perf_query_group == static_cast<PerfQueryGroup>(-1))
|
||||||
|
{
|
||||||
|
[enc setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
[enc setVisibilityResultMode:MTLVisibilityResultModeCounting
|
||||||
|
offset:m_current_perf_query->groups.size() * 8];
|
||||||
|
m_current_perf_query->groups.push_back(m_state.perf_query_group);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (is_gx)
|
if (is_gx)
|
||||||
{
|
{
|
||||||
// GX draw
|
// GX draw
|
||||||
|
Reference in New Issue
Block a user