diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.cpp b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp index 98e24d934c..4b2ec079ee 100644 --- a/Source/Core/VideoBackends/D3D12/PerfQuery.cpp +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp @@ -57,6 +57,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type) PartialFlush(do_resolve, blocking); } + // Ensure all state is applied before beginning the query. + // This is because we can't leave a query open when submitting a command list, and the draw + // call itself may need to execute a command list if we run out of descriptors. Note that + // this assumes that the caller has bound all required state prior to enabling the query. + Renderer::GetInstance()->ApplyState(); + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) { ActiveQuery& entry = m_query_buffer[m_query_next_pos]; @@ -152,9 +158,9 @@ void PerfQuery::ResolveQueries(u32 query_count) m_unresolved_queries -= query_count; } -void PerfQuery::ReadbackQueries() +void PerfQuery::ReadbackQueries(bool blocking) { - const u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue(); + u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue(); // Need to save these since ProcessResults will modify them. const u32 outstanding_queries = m_query_count; @@ -163,13 +169,24 @@ void PerfQuery::ReadbackQueries() { u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE; const ActiveQuery& entry = m_query_buffer[index]; - if (!entry.resolved || entry.fence_value > completed_fence_counter) + if (!entry.resolved) break; + if (entry.fence_value > completed_fence_counter) + { + // Query result isn't ready yet. Wait if blocking, otherwise we can't do any more yet. + if (!blocking) + break; + + ASSERT(entry.fence_value != g_dx_context->GetCurrentFenceValue()); + g_dx_context->WaitForFence(entry.fence_value); + completed_fence_counter = g_dx_context->GetCompletedFenceValue(); + } + // If this wrapped around, we need to flush the entries before the end of the buffer. if (index < m_query_readback_pos) { - ReadbackQueries(readback_count); + AccumulateQueriesFromBuffer(readback_count); DEBUG_ASSERT(m_query_readback_pos == 0); readback_count = 0; } @@ -178,10 +195,10 @@ void PerfQuery::ReadbackQueries() } if (readback_count > 0) - ReadbackQueries(readback_count); + AccumulateQueriesFromBuffer(readback_count); } -void PerfQuery::ReadbackQueries(u32 query_count) +void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count) { // Should be at maximum query_count queries pending. ASSERT(query_count <= m_query_count && @@ -226,10 +243,10 @@ void PerfQuery::ReadbackQueries(u32 query_count) void PerfQuery::PartialFlush(bool resolve, bool blocking) { - // Submit a command buffer in the background if the front query is not bound to one. - if ((resolve || blocking) && !m_query_buffer[m_query_resolve_pos].resolved) - Renderer::GetInstance()->ExecuteCommandList(blocking); + // Submit a command buffer if there are unresolved queries (to write them to the buffer). + if (resolve && m_unresolved_queries > 0) + Renderer::GetInstance()->ExecuteCommandList(false); - ReadbackQueries(); + ReadbackQueries(blocking); } } // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.h b/Source/Core/VideoBackends/D3D12/PerfQuery.h index c21436652e..2173519a2c 100644 --- a/Source/Core/VideoBackends/D3D12/PerfQuery.h +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.h @@ -38,8 +38,8 @@ private: }; void ResolveQueries(u32 query_count); - void ReadbackQueries(); - void ReadbackQueries(u32 query_count); + void ReadbackQueries(bool blocking); + void AccumulateQueriesFromBuffer(u32 query_count); void PartialFlush(bool resolve, bool blocking); diff --git a/Source/Core/VideoBackends/D3D12/Renderer.h b/Source/Core/VideoBackends/D3D12/Renderer.h index 60e0000cff..1979833048 100644 --- a/Source/Core/VideoBackends/D3D12/Renderer.h +++ b/Source/Core/VideoBackends/D3D12/Renderer.h @@ -89,6 +89,9 @@ public: void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size); void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format); + // Binds all dirty state + bool ApplyState(); + protected: void OnConfigChanged(u32 bits) override; @@ -131,8 +134,6 @@ private: void CheckForSwapChainChanges(); - // Binds all dirty state - bool ApplyState(); void BindFramebuffer(DXFramebuffer* fb); void SetRootSignatures(); void SetDescriptorHeaps(); diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp index f19b67d12b..e13c02173d 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp @@ -45,6 +45,10 @@ void PerfQuery::EnableQuery(PerfQueryGroup type) if (m_query_count > m_query_buffer.size() / 2) PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE); + // Ensure command buffer is ready to go before beginning the query, that way we don't submit + // a buffer with open queries. + StateTracker::GetInstance()->Bind(); + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) { ActiveQuery& entry = m_query_buffer[m_query_next_pos]; diff --git a/Source/Core/VideoCommon/PerfQueryBase.h b/Source/Core/VideoCommon/PerfQueryBase.h index 449b86cc1b..de99b215c9 100644 --- a/Source/Core/VideoCommon/PerfQueryBase.h +++ b/Source/Core/VideoCommon/PerfQueryBase.h @@ -31,22 +31,31 @@ class PerfQueryBase public: PerfQueryBase() : m_query_count(0) {} virtual ~PerfQueryBase() {} + // Checks if performance queries are enabled in the gameini configuration. // NOTE: Called from CPU+GPU thread static bool ShouldEmulate(); // Begin querying the specified value for the following host GPU commands + // The call to EnableQuery() should be placed immediately before the draw command, otherwise + // there is a risk of GPU resets if the query is left open and the buffer is submitted during + // resource binding (D3D12/Vulkan). virtual void EnableQuery(PerfQueryGroup type) {} + // Stop querying the specified value for the following host GPU commands virtual void DisableQuery(PerfQueryGroup type) {} + // Reset query counters to zero and drop any pending queries virtual void ResetQuery() {} + // Return the measured value for the specified query type // NOTE: Called from CPU thread virtual u32 GetQueryResult(PerfQueryType type) { return 0; } + // Request the value of any pending queries - causes a pipeline flush and thus should be used // carefully! virtual void FlushResults() {} + // True if there are no further pending query results // NOTE: Called from CPU thread virtual bool IsFlushed() const { return true; }