Remove all remaining volatile qualifiers

2025-07-23 14:19:46 -06:00 · 2021-05-13 19:30:30 +02:00
parent 41befc21cd
commit 8a0f5ea04a
11 changed files with 229 additions and 178 deletions
--- a/Source/Core/VideoBackends/D3D/D3DPerfQuery.cpp
+++ b/Source/Core/VideoBackends/D3D/D3DPerfQuery.cpp
@ -27,11 +27,13 @@ PerfQuery::~PerfQuery() = default;

 void PerfQuery::EnableQuery(PerfQueryGroup type)
 {
+  const u32 query_count = m_query_count.load(std::memory_order_relaxed);
+
  // Is this sane?
-  if (m_query_count > m_query_buffer.size() / 2)
+  if (query_count > m_query_buffer.size() / 2)
    WeakFlush();

-  if (m_query_buffer.size() == m_query_count)
+  if (m_query_buffer.size() == query_count)
  {
    // TODO
    FlushOne();
@ -41,12 +43,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
  // start query
  if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
  {
-    auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
+    auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

    D3D::context->Begin(entry.query.Get());
    entry.query_type = type;

-    ++m_query_count;
+    m_query_count.fetch_add(1, std::memory_order_relaxed);
  }
 }

@ -55,7 +57,8 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
  // stop query
  if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
  {
-    auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + m_query_buffer.size() - 1) %
+    auto& entry = m_query_buffer[(m_query_read_pos + m_query_count.load(std::memory_order_relaxed) +
+                                  m_query_buffer.size() - 1) %
                                 m_query_buffer.size()];
    D3D::context->End(entry.query.Get());
  }
@ -63,8 +66,9 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)

 void PerfQuery::ResetQuery()
 {
-  m_query_count = 0;
-  std::fill(std::begin(m_results), std::end(m_results), 0);
+  m_query_count.store(0, std::memory_order_relaxed);
+  for (size_t i = 0; i < m_results.size(); ++i)
+    m_results[i].store(0, std::memory_order_relaxed);
 }

 u32 PerfQuery::GetQueryResult(PerfQueryType type)
@ -72,13 +76,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
  u32 result = 0;

  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-    result = m_results[PQG_ZCOMP_ZCOMPLOC];
+  {
+    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-    result = m_results[PQG_ZCOMP];
+  {
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_BLEND_INPUT)
-    result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
+  {
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
+             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_EFB_COPY_CLOCKS)
-    result = m_results[PQG_EFB_COPY_CLOCKS];
+  {
+    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
+  }

  return result;
 }
@ -98,11 +111,13 @@ void PerfQuery::FlushOne()
  // NOTE: Reported pixel metrics should be referenced to native resolution
  // TODO: Dropping the lower 2 bits from this count should be closer to actual
  // hardware behavior when drawing triangles.
-  m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
-                                       EFB_HEIGHT / g_renderer->GetTargetHeight());
+  const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
+                                g_renderer->GetTargetHeight();
+  m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
+                                        std::memory_order_relaxed);

  m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
-  --m_query_count;
+  m_query_count.fetch_sub(1, std::memory_order_relaxed);
 }

 // TODO: could selectively flush things, but I don't think that will do much
@ -125,11 +140,13 @@ void PerfQuery::WeakFlush()
    if (hr == S_OK)
    {
      // NOTE: Reported pixel metrics should be referenced to native resolution
-      m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
-                                           EFB_HEIGHT / g_renderer->GetTargetHeight());
+      const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
+                                    g_renderer->GetTargetHeight();
+      m_results[entry.query_type].store(static_cast<u32>(native_res_result),
+                                        std::memory_order_relaxed);

      m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
-      --m_query_count;
+      m_query_count.fetch_sub(1, std::memory_order_relaxed);
    }
    else
    {
@ -140,7 +157,7 @@ void PerfQuery::WeakFlush()

 bool PerfQuery::IsFlushed() const
 {
-  return 0 == m_query_count;
+  return m_query_count.load(std::memory_order_relaxed) == 0;
 }

 }  // namespace DX11
--- a/Source/Core/VideoBackends/D3D12/D3D12PerfQuery.cpp
+++ b/Source/Core/VideoBackends/D3D12/D3D12PerfQuery.cpp
@ -52,10 +52,11 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
 {
  // Block if there are no free slots.
  // Otherwise, try to keep half of them available.
-  if (m_query_count > m_query_buffer.size() / 2)
+  const u32 query_count = m_query_count.load(std::memory_order_relaxed);
+  if (query_count > m_query_buffer.size() / 2)
  {
    const bool do_resolve = m_unresolved_queries > m_query_buffer.size() / 2;
-    const bool blocking = m_query_count == PERF_QUERY_BUFFER_SIZE;
+    const bool blocking = query_count == PERF_QUERY_BUFFER_SIZE;
    PartialFlush(do_resolve, blocking);
  }

@ -83,19 +84,20 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
    g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
                                             m_query_next_pos);
    m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
-    m_query_count++;
+    m_query_count.fetch_add(1, std::memory_order_relaxed);
    m_unresolved_queries++;
  }
 }

 void PerfQuery::ResetQuery()
 {
-  m_query_count = 0;
+  m_query_count.store(0, std::memory_order_relaxed);
  m_unresolved_queries = 0;
  m_query_resolve_pos = 0;
  m_query_readback_pos = 0;
  m_query_next_pos = 0;
-  std::fill(std::begin(m_results), std::end(m_results), 0);
+  for (size_t i = 0; i < m_results.size(); ++i)
+    m_results[i].store(0, std::memory_order_relaxed);
  for (auto& entry : m_query_buffer)
  {
    entry.fence_value = 0;
@ -108,13 +110,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
 {
  u32 result = 0;
  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-    result = m_results[PQG_ZCOMP_ZCOMPLOC];
+  {
+    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-    result = m_results[PQG_ZCOMP];
+  {
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_BLEND_INPUT)
-    result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
+  {
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
+             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_EFB_COPY_CLOCKS)
-    result = m_results[PQG_EFB_COPY_CLOCKS];
+  {
+    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
+  }

  return result / 4;
 }
@ -127,7 +138,7 @@ void PerfQuery::FlushResults()

 bool PerfQuery::IsFlushed() const
 {
-  return m_query_count == 0;
+  return m_query_count.load(std::memory_order_relaxed) == 0;
 }

 void PerfQuery::ResolveQueries()
@ -165,7 +176,7 @@ void PerfQuery::ReadbackQueries(bool blocking)
  u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue();

  // Need to save these since ProcessResults will modify them.
-  const u32 outstanding_queries = m_query_count;
+  const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
  u32 readback_count = 0;
  for (u32 i = 0; i < outstanding_queries; i++)
  {
@ -203,7 +214,7 @@ void PerfQuery::ReadbackQueries(bool blocking)
 void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
 {
  // Should be at maximum query_count queries pending.
-  ASSERT(query_count <= m_query_count &&
+  ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
         (m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

  const D3D12_RANGE read_range = {m_query_readback_pos * sizeof(PerfQueryDataType),
@ -231,16 +242,18 @@ void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
    std::memcpy(&result, mapped_ptr + (index * sizeof(PerfQueryDataType)), sizeof(result));

    // NOTE: Reported pixel metrics should be referenced to native resolution
-    m_results[entry.query_type] +=
-        static_cast<u32>(static_cast<u64>(result) * EFB_WIDTH / g_renderer->GetTargetWidth() *
-                         EFB_HEIGHT / g_renderer->GetTargetHeight());
+    const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH /
+                                  g_renderer->GetTargetWidth() * EFB_HEIGHT /
+                                  g_renderer->GetTargetHeight();
+    m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
+                                          std::memory_order_relaxed);
  }

  constexpr D3D12_RANGE write_range = {0, 0};
  m_query_readback_buffer->Unmap(0, &write_range);

  m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
-  m_query_count -= query_count;
+  m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
 }

 void PerfQuery::PartialFlush(bool resolve, bool blocking)
--- a/Source/Core/VideoBackends/OGL/OGLPerfQuery.cpp
+++ b/Source/Core/VideoBackends/OGL/OGLPerfQuery.cpp
@ -43,7 +43,7 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)

 bool PerfQuery::IsFlushed() const
 {
-  return 0 == m_query_count;
+  return m_query_count.load(std::memory_order_relaxed) == 0;
 }

 // TODO: could selectively flush things, but I don't think that will do much
@ -54,8 +54,9 @@ void PerfQuery::FlushResults()

 void PerfQuery::ResetQuery()
 {
-  m_query_count = 0;
-  std::fill(std::begin(m_results), std::end(m_results), 0);
+  m_query_count.store(0, std::memory_order_relaxed);
+  for (size_t i = 0; i < m_results.size(); ++i)
+    m_results[i].store(0, std::memory_order_relaxed);
 }

 u32 PerfQuery::GetQueryResult(PerfQueryType type)
@ -64,19 +65,20 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)

  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
  {
-    result = m_results[PQG_ZCOMP_ZCOMPLOC];
+    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
  }
  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
  {
-    result = m_results[PQG_ZCOMP];
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
  }
  else if (type == PQ_BLEND_INPUT)
  {
-    result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
+             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
  }
  else if (type == PQ_EFB_COPY_CLOCKS)
  {
-    result = m_results[PQG_EFB_COPY_CLOCKS];
+    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
  }

  return result;
@ -97,11 +99,13 @@ PerfQueryGL::~PerfQueryGL()

 void PerfQueryGL::EnableQuery(PerfQueryGroup type)
 {
+  const u32 query_count = m_query_count.load(std::memory_order_relaxed);
+
  // Is this sane?
-  if (m_query_count > m_query_buffer.size() / 2)
+  if (query_count > m_query_buffer.size() / 2)
    WeakFlush();

-  if (m_query_buffer.size() == m_query_count)
+  if (m_query_buffer.size() == query_count)
  {
    FlushOne();
    // ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
@ -110,12 +114,12 @@ void PerfQueryGL::EnableQuery(PerfQueryGroup type)
  // start query
  if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
  {
-    auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
+    auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

    glBeginQuery(m_query_type, entry.query_id);
    entry.query_type = type;

-    ++m_query_count;
+    m_query_count.fetch_add(1, std::memory_order_relaxed);
  }
 }
 void PerfQueryGL::DisableQuery(PerfQueryGroup type)
@ -164,10 +168,10 @@ void PerfQueryGL::FlushOne()
  if (g_ActiveConfig.iMultisamples > 1)
    result /= g_ActiveConfig.iMultisamples;

-  m_results[entry.query_type] += result;
+  m_results[entry.query_type].fetch_add(result, std::memory_order_relaxed);

  m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
-  --m_query_count;
+  m_query_count.fetch_sub(1, std::memory_order_relaxed);
 }

 // TODO: could selectively flush things, but I don't think that will do much
@ -191,11 +195,12 @@ PerfQueryGLESNV::~PerfQueryGLESNV()

 void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
 {
+  const u32 query_count = m_query_count.load(std::memory_order_relaxed);
  // Is this sane?
-  if (m_query_count > m_query_buffer.size() / 2)
+  if (query_count > m_query_buffer.size() / 2)
    WeakFlush();

-  if (m_query_buffer.size() == m_query_count)
+  if (m_query_buffer.size() == query_count)
  {
    FlushOne();
    // ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
@ -204,12 +209,12 @@ void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
  // start query
  if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
  {
-    auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
+    auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

    glBeginOcclusionQueryNV(entry.query_id);
    entry.query_type = type;

-    ++m_query_count;
+    m_query_count.fetch_add(1, std::memory_order_relaxed);
  }
 }
 void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
@ -251,11 +256,13 @@ void PerfQueryGLESNV::FlushOne()
  // NOTE: Reported pixel metrics should be referenced to native resolution
  // TODO: Dropping the lower 2 bits from this count should be closer to actual
  // hardware behavior when drawing triangles.
-  m_results[entry.query_type] += static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
-                                 (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
+  const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
+                                (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
+  m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
+                                        std::memory_order_relaxed);

  m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
-  --m_query_count;
+  m_query_count.fetch_sub(1, std::memory_order_relaxed);
 }

 // TODO: could selectively flush things, but I don't think that will do much
--- a/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp
+++ b/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp
@ -43,8 +43,9 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
 {
  // Block if there are no free slots.
  // Otherwise, try to keep half of them available.
-  if (m_query_count > m_query_buffer.size() / 2)
-    PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE);
+  const u32 query_count = m_query_count.load(std::memory_order_relaxed);
+  if (query_count > m_query_buffer.size() / 2)
+    PartialFlush(query_count == PERF_QUERY_BUFFER_SIZE);

  // Ensure command buffer is ready to go before beginning the query, that way we don't submit
  // a buffer with open queries.
@ -73,16 +74,17 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
  {
    vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
    m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
-    m_query_count++;
+    m_query_count.fetch_add(1, std::memory_order_relaxed);
  }
 }

 void PerfQuery::ResetQuery()
 {
-  m_query_count = 0;
+  m_query_count.store(0, std::memory_order_relaxed);
  m_query_readback_pos = 0;
  m_query_next_pos = 0;
-  std::fill(std::begin(m_results), std::end(m_results), 0);
+  for (size_t i = 0; i < m_results.size(); ++i)
+    m_results[i].store(0, std::memory_order_relaxed);

  // Reset entire query pool, ensuring all queries are ready to write to.
  StateTracker::GetInstance()->EndRenderPass();
@ -96,13 +98,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
 {
  u32 result = 0;
  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-    result = m_results[PQG_ZCOMP_ZCOMPLOC];
+  {
+    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-    result = m_results[PQG_ZCOMP];
+  {
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_BLEND_INPUT)
-    result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
+  {
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
+             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
+  }
  else if (type == PQ_EFB_COPY_CLOCKS)
-    result = m_results[PQG_EFB_COPY_CLOCKS];
+  {
+    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
+  }

  return result / 4;
 }
@ -115,7 +126,7 @@ void PerfQuery::FlushResults()

 bool PerfQuery::IsFlushed() const
 {
-  return m_query_count == 0;
+  return m_query_count.load(std::memory_order_relaxed) == 0;
 }

 bool PerfQuery::CreateQueryPool()
@ -144,7 +155,7 @@ void PerfQuery::ReadbackQueries()
  const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();

  // Need to save these since ProcessResults will modify them.
-  const u32 outstanding_queries = m_query_count;
+  const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
  u32 readback_count = 0;
  for (u32 i = 0; i < outstanding_queries; i++)
  {
@ -171,7 +182,7 @@ void PerfQuery::ReadbackQueries()
 void PerfQuery::ReadbackQueries(u32 query_count)
 {
  // Should be at maximum query_count queries pending.
-  ASSERT(query_count <= m_query_count &&
+  ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
         (m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

  // Read back from the GPU.
@ -194,13 +205,15 @@ void PerfQuery::ReadbackQueries(u32 query_count)
    entry.has_value = false;

    // NOTE: Reported pixel metrics should be referenced to native resolution
-    m_results[entry.query_type] +=
-        static_cast<u32>(static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
-                         g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
+    const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
+                                  g_renderer->GetTargetWidth() * EFB_HEIGHT /
+                                  g_renderer->GetTargetHeight();
+    m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
+                                          std::memory_order_relaxed);
  }

  m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
-  m_query_count -= query_count;
+  m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
 }

 void PerfQuery::PartialFlush(bool blocking)