From b93983b50a8b2d50a2120601784aab022f46308c Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 13 May 2021 18:44:59 +0200 Subject: [PATCH] Remove Atomic.h The STL has everything we need nowadays. I have tried to not alter any behavior or semantics with this change wherever possible. In particular, WriteLow and WriteHigh in CommandProcessor retain the ability to accidentally undo another thread's write to the upper half or lower half respectively. If that should be fixed, it should be done in a separate commit for clarity. One thing did change: The places where we were using += on a volatile variable (not an atomic operation) are now using fetch_add (actually an atomic operation). Tested with single core and dual core on x86-64 and AArch64. --- Source/Core/Common/Atomic.h | 16 -- Source/Core/Common/Atomic_GCC.h | 86 -------- Source/Core/Common/Atomic_Win32.h | 94 --------- Source/Core/Common/CMakeLists.txt | 1 - Source/Core/Common/ChunkFile.h | 4 +- Source/Core/Core/HW/MMIO.h | 12 +- Source/Core/Core/HW/SystemTimers.cpp | 1 - Source/Core/DolphinLib.props | 3 - Source/Core/VideoBackends/OGL/OGLRender.cpp | 1 - Source/Core/VideoCommon/CommandProcessor.cpp | 211 +++++++++++-------- Source/Core/VideoCommon/CommandProcessor.h | 16 +- Source/Core/VideoCommon/Fifo.cpp | 48 +++-- Source/Core/VideoCommon/RenderBase.cpp | 4 +- 13 files changed, 178 insertions(+), 319 deletions(-) delete mode 100644 Source/Core/Common/Atomic.h delete mode 100644 Source/Core/Common/Atomic_GCC.h delete mode 100644 Source/Core/Common/Atomic_Win32.h diff --git a/Source/Core/Common/Atomic.h b/Source/Core/Common/Atomic.h deleted file mode 100644 index 2b252a6924..0000000000 --- a/Source/Core/Common/Atomic.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#ifdef _WIN32 - -#include "Common/Atomic_Win32.h" // IWYU pragma: export - -#else - -// GCC-compatible compiler assumed! -#include "Common/Atomic_GCC.h" // IWYU pragma: export - -#endif diff --git a/Source/Core/Common/Atomic_GCC.h b/Source/Core/Common/Atomic_GCC.h deleted file mode 100644 index 1094c197b4..0000000000 --- a/Source/Core/Common/Atomic_GCC.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// IWYU pragma: private, include "Common/Atomic.h" - -#pragma once - -#include "Common/Common.h" -#include "Common/CommonTypes.h" - -// Atomic operations are performed in a single step by the CPU. It is -// impossible for other threads to see the operation "half-done." -// -// Some atomic operations can be combined with different types of memory -// barriers called "Acquire semantics" and "Release semantics", defined below. -// -// Acquire semantics: Future memory accesses cannot be relocated to before the -// operation. -// -// Release semantics: Past memory accesses cannot be relocated to after the -// operation. -// -// These barriers affect not only the compiler, but also the CPU. - -namespace Common -{ -inline void AtomicAdd(volatile u32& target, u32 value) -{ - __sync_add_and_fetch(&target, value); -} - -inline void AtomicAnd(volatile u32& target, u32 value) -{ - __sync_and_and_fetch(&target, value); -} - -inline void AtomicDecrement(volatile u32& target) -{ - __sync_add_and_fetch(&target, -1); -} - -inline void AtomicIncrement(volatile u32& target) -{ - __sync_add_and_fetch(&target, 1); -} - -inline void AtomicOr(volatile u32& target, u32 value) -{ - __sync_or_and_fetch(&target, value); -} - -#ifndef __ATOMIC_RELAXED -#error __ATOMIC_RELAXED not defined; your compiler version is too old. -#endif - -template -inline T AtomicLoad(volatile T& src) -{ - return __atomic_load_n(&src, __ATOMIC_RELAXED); -} - -template -inline T AtomicLoadAcquire(volatile T& src) -{ - return __atomic_load_n(&src, __ATOMIC_ACQUIRE); -} - -template -inline void AtomicStore(volatile T& dest, U value) -{ - __atomic_store_n(&dest, value, __ATOMIC_RELAXED); -} - -template -inline void AtomicStoreRelease(volatile T& dest, U value) -{ - __atomic_store_n(&dest, value, __ATOMIC_RELEASE); -} - -template -inline T* AtomicExchangeAcquire(T* volatile& loc, U newval) -{ - return __atomic_exchange_n(&loc, newval, __ATOMIC_ACQ_REL); -} -} // namespace Common diff --git a/Source/Core/Common/Atomic_Win32.h b/Source/Core/Common/Atomic_Win32.h deleted file mode 100644 index d0c5a95ebf..0000000000 --- a/Source/Core/Common/Atomic_Win32.h +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// IWYU pragma: private, include "Common/Atomic.h" - -#pragma once - -#include - -#include -#include "Common/CommonTypes.h" - -// Atomic operations are performed in a single step by the CPU. It is -// impossible for other threads to see the operation "half-done." -// -// Some atomic operations can be combined with different types of memory -// barriers called "Acquire semantics" and "Release semantics", defined below. -// -// Acquire semantics: Future memory accesses cannot be relocated to before the -// operation. -// -// Release semantics: Past memory accesses cannot be relocated to after the -// operation. -// -// These barriers affect not only the compiler, but also the CPU. -// -// NOTE: Acquire and Release are not differentiated right now. They perform a -// full memory barrier instead of a "one-way" memory barrier. The newest -// Windows SDK has Acquire and Release versions of some Interlocked* functions. - -namespace Common -{ -inline void AtomicAdd(volatile u32& target, u32 value) -{ - _InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value); -} - -inline void AtomicAnd(volatile u32& target, u32 value) -{ - _InterlockedAnd((volatile LONG*)&target, (LONG)value); -} - -inline void AtomicIncrement(volatile u32& target) -{ - _InterlockedIncrement((volatile LONG*)&target); -} - -inline void AtomicDecrement(volatile u32& target) -{ - _InterlockedDecrement((volatile LONG*)&target); -} - -inline void AtomicOr(volatile u32& target, u32 value) -{ - _InterlockedOr((volatile LONG*)&target, (LONG)value); -} - -template -inline T AtomicLoad(volatile T& src) -{ - return src; // 32-bit reads are always atomic. -} - -template -inline T AtomicLoadAcquire(volatile T& src) -{ - // 32-bit reads are always atomic. - T result = src; - // Compiler instruction only. x86 loads always have acquire semantics. - std::atomic_thread_fence(std::memory_order_acquire); - return result; -} - -template -inline void AtomicStore(volatile T& dest, U value) -{ - dest = (T)value; // 32-bit writes are always atomic. -} - -template -inline void AtomicStoreRelease(volatile T& dest, U value) -{ - // Compiler instruction only. x86 stores always have release semantics. - std::atomic_thread_fence(std::memory_order_release); - dest = (T)value; // 32-bit writes are always atomic. -} - -template -inline T* AtomicExchangeAcquire(T* volatile& loc, U newval) -{ - return (T*)_InterlockedExchangePointer_acq((void* volatile*)&loc, (void*)newval); -} -} // namespace Common diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index fc955c1487..1b806129fd 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -2,7 +2,6 @@ add_library(common Analytics.cpp Analytics.h Assert.h - Atomic.h BitField.h BitSet.h BitUtils.h diff --git a/Source/Core/Common/ChunkFile.h b/Source/Core/Common/ChunkFile.h index 3e556f5397..e657cd2217 100644 --- a/Source/Core/Common/ChunkFile.h +++ b/Source/Core/Common/ChunkFile.h @@ -221,10 +221,10 @@ public: template void Do(std::atomic& atomic) { - T temp = atomic.load(); + T temp = atomic.load(std::memory_order_relaxed); Do(temp); if (mode == MODE_READ) - atomic.store(temp); + atomic.store(temp, std::memory_order_relaxed); } template diff --git a/Source/Core/Core/HW/MMIO.h b/Source/Core/Core/HW/MMIO.h index bb76cefced..a91ca12df4 100644 --- a/Source/Core/Core/HW/MMIO.h +++ b/Source/Core/Core/HW/MMIO.h @@ -5,11 +5,13 @@ #pragma once #include +#include #include #include #include #include "Common/Assert.h" +#include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Core/ConfigManager.h" #include "Core/HW/MMIOHandlers.h" @@ -79,17 +81,19 @@ inline u16* LowPart(u32* ptr) { return (u16*)ptr; } -inline u16* LowPart(volatile u32* ptr) +inline u16* LowPart(std::atomic* ptr) { - return (u16*)ptr; + static_assert(std::atomic::is_always_lock_free && sizeof(std::atomic) == sizeof(u32)); + return LowPart(Common::BitCast(ptr)); } inline u16* HighPart(u32* ptr) { return LowPart(ptr) + 1; } -inline u16* HighPart(volatile u32* ptr) +inline u16* HighPart(std::atomic* ptr) { - return LowPart(ptr) + 1; + static_assert(std::atomic::is_always_lock_free && sizeof(std::atomic) == sizeof(u32)); + return HighPart(Common::BitCast(ptr)); } } // namespace Utils diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index e3ea958da5..32e75ded15 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -49,7 +49,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule: #include #include -#include "Common/Atomic.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Common/Thread.h" diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index e8af8bc174..dcebeaca39 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -16,9 +16,6 @@ - - - diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 3bcf38c2ae..5be25d3436 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -11,7 +11,6 @@ #include #include -#include "Common/Atomic.h" #include "Common/CommonTypes.h" #include "Common/GL/GLContext.h" #include "Common/GL/GLUtil.h" diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 0ad2bf2cea..14b865386e 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -6,7 +6,6 @@ #include #include "Common/Assert.h" -#include "Common/Atomic.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Flag.h" @@ -91,21 +90,15 @@ void DoState(PointerWrap& p) p.Do(s_interrupt_waiting); } -static inline void WriteLow(volatile u32& _reg, u16 lowbits) +static inline void WriteLow(std::atomic& reg, u16 lowbits) { - Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits); + reg.store((reg.load(std::memory_order_relaxed) & 0xFFFF0000) | lowbits, + std::memory_order_relaxed); } -static inline void WriteHigh(volatile u32& _reg, u16 highbits) +static inline void WriteHigh(std::atomic& reg, u16 highbits) { - Common::AtomicStore(_reg, (_reg & 0x0000FFFF) | ((u32)highbits << 16)); -} -static inline u16 ReadLow(u32 _reg) -{ - return (u16)(_reg & 0xFFFF); -} -static inline u16 ReadHigh(u32 _reg) -{ - return (u16)(_reg >> 16); + reg.store((reg.load(std::memory_order_relaxed) & 0x0000FFFF) | (static_cast(highbits) << 16), + std::memory_order_relaxed); } void Init() @@ -259,31 +252,50 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) mmio->Register(base | PERF_SELECT, MMIO::InvalidRead(), MMIO::Nop()); // Some MMIOs have different handlers for single core vs. dual core mode. - mmio->Register(base | FIFO_RW_DISTANCE_LO, + mmio->Register( + base | FIFO_RW_DISTANCE_LO, + IsOnThread() ? MMIO::ComplexRead([](u32) { + if (fifo.CPWritePointer.load(std::memory_order_relaxed) >= + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) + { + return static_cast(fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)); + } + else + { + return static_cast(fifo.CPEnd.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed) + + fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.CPBase.load(std::memory_order_relaxed) + 32); + } + }) : + MMIO::DirectRead(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)), + MMIO::DirectWrite(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), + WMASK_LO_ALIGN_32BIT)); + mmio->Register(base | FIFO_RW_DISTANCE_HI, IsOnThread() ? MMIO::ComplexRead([](u32) { - if (fifo.CPWritePointer >= fifo.SafeCPReadPointer) - return ReadLow(fifo.CPWritePointer - fifo.SafeCPReadPointer); + Fifo::SyncGPUForRegisterAccess(); + if (fifo.CPWritePointer.load(std::memory_order_relaxed) >= + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) + { + return (fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) >> + 16; + } else - return ReadLow(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - - fifo.CPBase + 32); + { + return (fifo.CPEnd.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed) + + fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.CPBase.load(std::memory_order_relaxed) + 32) >> + 16; + } }) : - MMIO::DirectRead(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)), - MMIO::DirectWrite(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), - WMASK_LO_ALIGN_32BIT)); - mmio->Register(base | FIFO_RW_DISTANCE_HI, - IsOnThread() ? MMIO::ComplexRead([](u32) { - Fifo::SyncGPUForRegisterAccess(); - if (fifo.CPWritePointer >= fifo.SafeCPReadPointer) - return ReadHigh(fifo.CPWritePointer - fifo.SafeCPReadPointer); - else - return ReadHigh(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - - fifo.CPBase + 32); - }) : - MMIO::ComplexRead([](u32) { - Fifo::SyncGPUForRegisterAccess(); - return ReadHigh(fifo.CPReadWriteDistance); - }), + MMIO::ComplexRead([](u32) { + Fifo::SyncGPUForRegisterAccess(); + return fifo.CPReadWriteDistance.load(std::memory_order_relaxed) >> 16; + }), MMIO::ComplexWrite([WMASK_HI_RESTRICT](u32, u16 val) { Fifo::SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadWriteDistance, val & WMASK_HI_RESTRICT); @@ -297,16 +309,17 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) mmio->Register(base | FIFO_READ_POINTER_HI, IsOnThread() ? MMIO::ComplexRead([](u32) { Fifo::SyncGPUForRegisterAccess(); - return ReadHigh(fifo.SafeCPReadPointer); + return fifo.SafeCPReadPointer.load(std::memory_order_relaxed) >> 16; }) : MMIO::ComplexRead([](u32) { Fifo::SyncGPUForRegisterAccess(); - return ReadHigh(fifo.CPReadPointer); + return fifo.CPReadPointer.load(std::memory_order_relaxed) >> 16; }), IsOnThread() ? MMIO::ComplexWrite([WMASK_HI_RESTRICT](u32, u16 val) { Fifo::SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT); - fifo.SafeCPReadPointer = fifo.CPReadPointer; + fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed), + std::memory_order_relaxed); }) : MMIO::ComplexWrite([WMASK_HI_RESTRICT](u32, u16 val) { Fifo::SyncGPUForRegisterAccess(); @@ -325,8 +338,9 @@ void GatherPipeBursted() { // In multibuffer mode is not allowed write in the same FIFO attached to the GPU. // Fix Pokemon XD in DC mode. - if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) && - (ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0) + if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd.load(std::memory_order_relaxed)) && + (ProcessorInterface::Fifo_CPUBase == fifo.CPBase.load(std::memory_order_relaxed)) && + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > 0) { Fifo::FlushGpu(); } @@ -336,35 +350,47 @@ void GatherPipeBursted() } // update the fifo pointer - if (fifo.CPWritePointer == fifo.CPEnd) - fifo.CPWritePointer = fifo.CPBase; + if (fifo.CPWritePointer.load(std::memory_order_relaxed) == + fifo.CPEnd.load(std::memory_order_relaxed)) + { + fifo.CPWritePointer.store(fifo.CPBase, std::memory_order_relaxed); + } else - fifo.CPWritePointer += GATHER_PIPE_SIZE; + { + fifo.CPWritePointer.fetch_add(GATHER_PIPE_SIZE, std::memory_order_relaxed); + } if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable) { - ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer; - ProcessorInterface::Fifo_CPUBase = fifo.CPBase; - ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd; + ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer.load(std::memory_order_relaxed); + ProcessorInterface::Fifo_CPUBase = fifo.CPBase.load(std::memory_order_relaxed); + ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd.load(std::memory_order_relaxed); } // If the game is running close to overflowing, make the exception checking more frequent. if (fifo.bFF_HiWatermark) CoreTiming::ForceExceptionCheck(0); - Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE); + fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst); Fifo::RunGpu(); - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) <= + fifo.CPEnd.load(std::memory_order_relaxed) - + fifo.CPBase.load(std::memory_order_relaxed), "FIFO is overflowed by GatherPipe !\nCPU thread is too fast!"); // check if we are in sync - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPWritePointer.load(std::memory_order_relaxed) == + ProcessorInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync"); - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPBase == ProcessorInterface::Fifo_CPUBase, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPBase.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUBase, "FIFOs linked but out of sync"); - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPEnd == ProcessorInterface::Fifo_CPUEnd, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPEnd.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUEnd, "FIFOs linked but out of sync"); } @@ -403,31 +429,41 @@ void SetCPStatusFromGPU() // breakpoint if (fifo.bFF_BPEnable) { - if (fifo.CPBreakpoint == fifo.CPReadPointer) + if (fifo.CPBreakpoint.load(std::memory_order_relaxed) == + fifo.CPReadPointer.load(std::memory_order_relaxed)) { if (!fifo.bFF_Breakpoint) { - DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}", fifo.CPReadPointer); + DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}", + fifo.CPReadPointer.load(std::memory_order_relaxed)); fifo.bFF_Breakpoint = true; } } else { if (fifo.bFF_Breakpoint) - DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer); + { + DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", + fifo.CPReadPointer.load(std::memory_order_relaxed)); + } fifo.bFF_Breakpoint = false; } } else { if (fifo.bFF_Breakpoint) - DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer); + { + DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", + fifo.CPReadPointer.load(std::memory_order_relaxed)); + } fifo.bFF_Breakpoint = false; } // overflow & underflow check - fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark); - fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark); + fifo.bFF_HiWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark); + fifo.bFF_LoWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark); bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt; bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt; @@ -457,8 +493,10 @@ void SetCPStatusFromGPU() void SetCPStatusFromCPU() { // overflow & underflow check - fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark); - fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark); + fifo.bFF_HiWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark); + fifo.bFF_LoWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark); bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt; bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt; @@ -489,9 +527,11 @@ void SetCpStatusRegister() { // Here always there is one fifo attached to the GPU m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint; - m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer); - m_CPStatusReg.CommandIdle = - !fifo.CPReadWriteDistance || Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable; + m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) || + (fifo.CPReadPointer.load(std::memory_order_relaxed) == + fifo.CPWritePointer.load(std::memory_order_relaxed)); + m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) || + Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable; m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark; m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark; @@ -548,29 +588,32 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false"); { - PanicAlertFmt("Illegal command {:02x}\n" - "CPBase: {:#010x}\n" - "CPEnd: {:#010x}\n" - "CPHiWatermark: {:#010x}\n" - "CPLoWatermark: {:#010x}\n" - "CPReadWriteDistance: {:#010x}\n" - "CPWritePointer: {:#010x}\n" - "CPReadPointer: {:#010x}\n" - "CPBreakpoint: {:#010x}\n" - "bFF_GPReadEnable: {}\n" - "bFF_BPEnable: {}\n" - "bFF_BPInt: {}\n" - "bFF_Breakpoint: {}\n" - "bFF_GPLinkEnable: {}\n" - "bFF_HiWatermarkInt: {}\n" - "bFF_LoWatermarkInt: {}\n", - cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, - fifo.CPReadWriteDistance, fifo.CPWritePointer, fifo.CPReadPointer, - fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false", - fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false", - fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false", - fifo.bFF_HiWatermarkInt ? "true" : "false", - fifo.bFF_LoWatermarkInt ? "true" : "false"); + PanicAlertFmt( + "Illegal command {:02x}\n" + "CPBase: {:#010x}\n" + "CPEnd: {:#010x}\n" + "CPHiWatermark: {:#010x}\n" + "CPLoWatermark: {:#010x}\n" + "CPReadWriteDistance: {:#010x}\n" + "CPWritePointer: {:#010x}\n" + "CPReadPointer: {:#010x}\n" + "CPBreakpoint: {:#010x}\n" + "bFF_GPReadEnable: {}\n" + "bFF_BPEnable: {}\n" + "bFF_BPInt: {}\n" + "bFF_Breakpoint: {}\n" + "bFF_GPLinkEnable: {}\n" + "bFF_HiWatermarkInt: {}\n" + "bFF_LoWatermarkInt: {}\n", + cmd_byte, fifo.CPBase.load(std::memory_order_relaxed), + fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark, fifo.CPLoWatermark, + fifo.CPReadWriteDistance.load(std::memory_order_relaxed), + fifo.CPWritePointer.load(std::memory_order_relaxed), + fifo.CPReadPointer.load(std::memory_order_relaxed), + fifo.CPBreakpoint.load(std::memory_order_relaxed), fifo.bFF_GPReadEnable ? "true" : "false", + fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false", + fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false", + fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_LoWatermarkInt ? "true" : "false"); } } diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index c246d5651d..bc0a8ede49 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/CommonTypes.h" class PointerWrap; @@ -17,15 +19,15 @@ namespace CommandProcessor struct SCPFifoStruct { // fifo registers - volatile u32 CPBase; - volatile u32 CPEnd; + std::atomic CPBase; + std::atomic CPEnd; u32 CPHiWatermark; u32 CPLoWatermark; - volatile u32 CPReadWriteDistance; - volatile u32 CPWritePointer; - volatile u32 CPReadPointer; - volatile u32 CPBreakpoint; - volatile u32 SafeCPReadPointer; + std::atomic CPReadWriteDistance; + std::atomic CPWritePointer; + std::atomic CPReadPointer; + std::atomic CPBreakpoint; + std::atomic SafeCPReadPointer; volatile u32 bFF_GPLinkEnable; volatile u32 bFF_GPReadEnable; diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 5aede12b96..f12776449c 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -8,7 +8,6 @@ #include #include "Common/Assert.h" -#include "Common/Atomic.h" #include "Common/BlockingLoop.h" #include "Common/ChunkFile.h" #include "Common/Event.h" @@ -329,33 +328,37 @@ void RunGpuLoop() // check if we are able to run this buffer while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && - fifo.CPReadWriteDistance && !AtBreakpoint()) + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint()) { if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance) break; u32 cyclesExecuted = 0; - u32 readPtr = fifo.CPReadPointer; + u32 readPtr = fifo.CPReadPointer.load(std::memory_order_relaxed); ReadDataFromFifo(readPtr); - if (readPtr == fifo.CPEnd) - readPtr = fifo.CPBase; + if (readPtr == fifo.CPEnd.load(std::memory_order_relaxed)) + readPtr = fifo.CPBase.load(std::memory_order_relaxed); else readPtr += 32; - ASSERT_MSG(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0, + ASSERT_MSG(COMMANDPROCESSOR, + (s32)fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32 >= 0, "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce " "instability in the game. Please report it.", - fifo.CPReadWriteDistance - 32); + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32); u8* write_ptr = s_video_buffer_write_ptr; s_video_buffer_read_ptr = OpcodeDecoder::Run( DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); - Common::AtomicStore(fifo.CPReadPointer, readPtr); - Common::AtomicAdd(fifo.CPReadWriteDistance, static_cast(-32)); + fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed); + fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst); if ((write_ptr - s_video_buffer_read_ptr) == 0) - Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); + { + fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed), + std::memory_order_relaxed); + } CommandProcessor::SetCPStatusFromGPU(); @@ -412,7 +415,8 @@ void GpuMaySleep() bool AtBreakpoint() { CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; - return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint); + return fifo.bFF_BPEnable && (fifo.CPReadPointer.load(std::memory_order_relaxed) == + fifo.CPBreakpoint.load(std::memory_order_relaxed)); } void RunGpu() @@ -442,12 +446,12 @@ static int RunGpuOnCpu(int ticks) CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; bool reset_simd_state = false; int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load(); - while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() && - available_ticks >= 0) + while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && + !AtBreakpoint() && available_ticks >= 0) { if (s_use_deterministic_gpu_thread) { - ReadDataFromFifoOnCPU(fifo.CPReadPointer); + ReadDataFromFifoOnCPU(fifo.CPReadPointer.load(std::memory_order_relaxed)); s_gpu_mainloop.Wakeup(); } else @@ -458,19 +462,25 @@ static int RunGpuOnCpu(int ticks) FPURoundMode::LoadDefaultSIMDState(); reset_simd_state = true; } - ReadDataFromFifo(fifo.CPReadPointer); + ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed)); u32 cycles = 0; s_video_buffer_read_ptr = OpcodeDecoder::Run( DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); available_ticks -= cycles; } - if (fifo.CPReadPointer == fifo.CPEnd) - fifo.CPReadPointer = fifo.CPBase; + if (fifo.CPReadPointer.load(std::memory_order_relaxed) == + fifo.CPEnd.load(std::memory_order_relaxed)) + { + fifo.CPReadPointer.store(fifo.CPBase.load(std::memory_order_relaxed), + std::memory_order_relaxed); + } else - fifo.CPReadPointer += 32; + { + fifo.CPReadPointer.fetch_add(32, std::memory_order_relaxed); + } - fifo.CPReadWriteDistance -= 32; + fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_relaxed); } CommandProcessor::SetCPStatusFromGPU(); diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 9eac785c5a..6162004127 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -889,7 +889,9 @@ void Renderer::CheckFifoRecording() RecordVideoMemory(); } - FifoRecorder::GetInstance().EndFrame(CommandProcessor::fifo.CPBase, CommandProcessor::fifo.CPEnd); + FifoRecorder::GetInstance().EndFrame( + CommandProcessor::fifo.CPBase.load(std::memory_order_relaxed), + CommandProcessor::fifo.CPEnd.load(std::memory_order_relaxed)); } void Renderer::RecordVideoMemory()