diff --git a/Source/Core/Common/Atomic.h b/Source/Core/Common/Atomic.h deleted file mode 100644 index 2b252a6924..0000000000 --- a/Source/Core/Common/Atomic.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#ifdef _WIN32 - -#include "Common/Atomic_Win32.h" // IWYU pragma: export - -#else - -// GCC-compatible compiler assumed! -#include "Common/Atomic_GCC.h" // IWYU pragma: export - -#endif diff --git a/Source/Core/Common/Atomic_GCC.h b/Source/Core/Common/Atomic_GCC.h deleted file mode 100644 index 1094c197b4..0000000000 --- a/Source/Core/Common/Atomic_GCC.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// IWYU pragma: private, include "Common/Atomic.h" - -#pragma once - -#include "Common/Common.h" -#include "Common/CommonTypes.h" - -// Atomic operations are performed in a single step by the CPU. It is -// impossible for other threads to see the operation "half-done." -// -// Some atomic operations can be combined with different types of memory -// barriers called "Acquire semantics" and "Release semantics", defined below. -// -// Acquire semantics: Future memory accesses cannot be relocated to before the -// operation. -// -// Release semantics: Past memory accesses cannot be relocated to after the -// operation. -// -// These barriers affect not only the compiler, but also the CPU. - -namespace Common -{ -inline void AtomicAdd(volatile u32& target, u32 value) -{ - __sync_add_and_fetch(&target, value); -} - -inline void AtomicAnd(volatile u32& target, u32 value) -{ - __sync_and_and_fetch(&target, value); -} - -inline void AtomicDecrement(volatile u32& target) -{ - __sync_add_and_fetch(&target, -1); -} - -inline void AtomicIncrement(volatile u32& target) -{ - __sync_add_and_fetch(&target, 1); -} - -inline void AtomicOr(volatile u32& target, u32 value) -{ - __sync_or_and_fetch(&target, value); -} - -#ifndef __ATOMIC_RELAXED -#error __ATOMIC_RELAXED not defined; your compiler version is too old. -#endif - -template -inline T AtomicLoad(volatile T& src) -{ - return __atomic_load_n(&src, __ATOMIC_RELAXED); -} - -template -inline T AtomicLoadAcquire(volatile T& src) -{ - return __atomic_load_n(&src, __ATOMIC_ACQUIRE); -} - -template -inline void AtomicStore(volatile T& dest, U value) -{ - __atomic_store_n(&dest, value, __ATOMIC_RELAXED); -} - -template -inline void AtomicStoreRelease(volatile T& dest, U value) -{ - __atomic_store_n(&dest, value, __ATOMIC_RELEASE); -} - -template -inline T* AtomicExchangeAcquire(T* volatile& loc, U newval) -{ - return __atomic_exchange_n(&loc, newval, __ATOMIC_ACQ_REL); -} -} // namespace Common diff --git a/Source/Core/Common/Atomic_Win32.h b/Source/Core/Common/Atomic_Win32.h deleted file mode 100644 index d0c5a95ebf..0000000000 --- a/Source/Core/Common/Atomic_Win32.h +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// IWYU pragma: private, include "Common/Atomic.h" - -#pragma once - -#include - -#include -#include "Common/CommonTypes.h" - -// Atomic operations are performed in a single step by the CPU. It is -// impossible for other threads to see the operation "half-done." -// -// Some atomic operations can be combined with different types of memory -// barriers called "Acquire semantics" and "Release semantics", defined below. -// -// Acquire semantics: Future memory accesses cannot be relocated to before the -// operation. -// -// Release semantics: Past memory accesses cannot be relocated to after the -// operation. -// -// These barriers affect not only the compiler, but also the CPU. -// -// NOTE: Acquire and Release are not differentiated right now. They perform a -// full memory barrier instead of a "one-way" memory barrier. The newest -// Windows SDK has Acquire and Release versions of some Interlocked* functions. - -namespace Common -{ -inline void AtomicAdd(volatile u32& target, u32 value) -{ - _InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value); -} - -inline void AtomicAnd(volatile u32& target, u32 value) -{ - _InterlockedAnd((volatile LONG*)&target, (LONG)value); -} - -inline void AtomicIncrement(volatile u32& target) -{ - _InterlockedIncrement((volatile LONG*)&target); -} - -inline void AtomicDecrement(volatile u32& target) -{ - _InterlockedDecrement((volatile LONG*)&target); -} - -inline void AtomicOr(volatile u32& target, u32 value) -{ - _InterlockedOr((volatile LONG*)&target, (LONG)value); -} - -template -inline T AtomicLoad(volatile T& src) -{ - return src; // 32-bit reads are always atomic. -} - -template -inline T AtomicLoadAcquire(volatile T& src) -{ - // 32-bit reads are always atomic. - T result = src; - // Compiler instruction only. x86 loads always have acquire semantics. - std::atomic_thread_fence(std::memory_order_acquire); - return result; -} - -template -inline void AtomicStore(volatile T& dest, U value) -{ - dest = (T)value; // 32-bit writes are always atomic. -} - -template -inline void AtomicStoreRelease(volatile T& dest, U value) -{ - // Compiler instruction only. x86 stores always have release semantics. - std::atomic_thread_fence(std::memory_order_release); - dest = (T)value; // 32-bit writes are always atomic. -} - -template -inline T* AtomicExchangeAcquire(T* volatile& loc, U newval) -{ - return (T*)_InterlockedExchangePointer_acq((void* volatile*)&loc, (void*)newval); -} -} // namespace Common diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index fc955c1487..1b806129fd 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -2,7 +2,6 @@ add_library(common Analytics.cpp Analytics.h Assert.h - Atomic.h BitField.h BitSet.h BitUtils.h diff --git a/Source/Core/Common/ChunkFile.h b/Source/Core/Common/ChunkFile.h index 3e556f5397..e657cd2217 100644 --- a/Source/Core/Common/ChunkFile.h +++ b/Source/Core/Common/ChunkFile.h @@ -221,10 +221,10 @@ public: template void Do(std::atomic& atomic) { - T temp = atomic.load(); + T temp = atomic.load(std::memory_order_relaxed); Do(temp); if (mode == MODE_READ) - atomic.store(temp); + atomic.store(temp, std::memory_order_relaxed); } template diff --git a/Source/Core/Core/HW/MMIO.h b/Source/Core/Core/HW/MMIO.h index bb76cefced..a91ca12df4 100644 --- a/Source/Core/Core/HW/MMIO.h +++ b/Source/Core/Core/HW/MMIO.h @@ -5,11 +5,13 @@ #pragma once #include +#include #include #include #include #include "Common/Assert.h" +#include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Core/ConfigManager.h" #include "Core/HW/MMIOHandlers.h" @@ -79,17 +81,19 @@ inline u16* LowPart(u32* ptr) { return (u16*)ptr; } -inline u16* LowPart(volatile u32* ptr) +inline u16* LowPart(std::atomic* ptr) { - return (u16*)ptr; + static_assert(std::atomic::is_always_lock_free && sizeof(std::atomic) == sizeof(u32)); + return LowPart(Common::BitCast(ptr)); } inline u16* HighPart(u32* ptr) { return LowPart(ptr) + 1; } -inline u16* HighPart(volatile u32* ptr) +inline u16* HighPart(std::atomic* ptr) { - return LowPart(ptr) + 1; + static_assert(std::atomic::is_always_lock_free && sizeof(std::atomic) == sizeof(u32)); + return HighPart(Common::BitCast(ptr)); } } // namespace Utils diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index e3ea958da5..32e75ded15 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -49,7 +49,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule: #include #include -#include "Common/Atomic.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Common/Thread.h" diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 13c155801f..ecfcb078e5 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -16,9 +16,6 @@ - - - diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 3bcf38c2ae..5be25d3436 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -11,7 +11,6 @@ #include #include -#include "Common/Atomic.h" #include "Common/CommonTypes.h" #include "Common/GL/GLContext.h" #include "Common/GL/GLUtil.h" diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 0ad2bf2cea..14b865386e 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -6,7 +6,6 @@ #include #include "Common/Assert.h" -#include "Common/Atomic.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Flag.h" @@ -91,21 +90,15 @@ void DoState(PointerWrap& p) p.Do(s_interrupt_waiting); } -static inline void WriteLow(volatile u32& _reg, u16 lowbits) +static inline void WriteLow(std::atomic& reg, u16 lowbits) { - Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits); + reg.store((reg.load(std::memory_order_relaxed) & 0xFFFF0000) | lowbits, + std::memory_order_relaxed); } -static inline void WriteHigh(volatile u32& _reg, u16 highbits) +static inline void WriteHigh(std::atomic& reg, u16 highbits) { - Common::AtomicStore(_reg, (_reg & 0x0000FFFF) | ((u32)highbits << 16)); -} -static inline u16 ReadLow(u32 _reg) -{ - return (u16)(_reg & 0xFFFF); -} -static inline u16 ReadHigh(u32 _reg) -{ - return (u16)(_reg >> 16); + reg.store((reg.load(std::memory_order_relaxed) & 0x0000FFFF) | (static_cast(highbits) << 16), + std::memory_order_relaxed); } void Init() @@ -259,31 +252,50 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) mmio->Register(base | PERF_SELECT, MMIO::InvalidRead(), MMIO::Nop()); // Some MMIOs have different handlers for single core vs. dual core mode. - mmio->Register(base | FIFO_RW_DISTANCE_LO, + mmio->Register( + base | FIFO_RW_DISTANCE_LO, + IsOnThread() ? MMIO::ComplexRead([](u32) { + if (fifo.CPWritePointer.load(std::memory_order_relaxed) >= + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) + { + return static_cast(fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)); + } + else + { + return static_cast(fifo.CPEnd.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed) + + fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.CPBase.load(std::memory_order_relaxed) + 32); + } + }) : + MMIO::DirectRead(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)), + MMIO::DirectWrite(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), + WMASK_LO_ALIGN_32BIT)); + mmio->Register(base | FIFO_RW_DISTANCE_HI, IsOnThread() ? MMIO::ComplexRead([](u32) { - if (fifo.CPWritePointer >= fifo.SafeCPReadPointer) - return ReadLow(fifo.CPWritePointer - fifo.SafeCPReadPointer); + Fifo::SyncGPUForRegisterAccess(); + if (fifo.CPWritePointer.load(std::memory_order_relaxed) >= + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) + { + return (fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) >> + 16; + } else - return ReadLow(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - - fifo.CPBase + 32); + { + return (fifo.CPEnd.load(std::memory_order_relaxed) - + fifo.SafeCPReadPointer.load(std::memory_order_relaxed) + + fifo.CPWritePointer.load(std::memory_order_relaxed) - + fifo.CPBase.load(std::memory_order_relaxed) + 32) >> + 16; + } }) : - MMIO::DirectRead(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance)), - MMIO::DirectWrite(MMIO::Utils::LowPart(&fifo.CPReadWriteDistance), - WMASK_LO_ALIGN_32BIT)); - mmio->Register(base | FIFO_RW_DISTANCE_HI, - IsOnThread() ? MMIO::ComplexRead([](u32) { - Fifo::SyncGPUForRegisterAccess(); - if (fifo.CPWritePointer >= fifo.SafeCPReadPointer) - return ReadHigh(fifo.CPWritePointer - fifo.SafeCPReadPointer); - else - return ReadHigh(fifo.CPEnd - fifo.SafeCPReadPointer + fifo.CPWritePointer - - fifo.CPBase + 32); - }) : - MMIO::ComplexRead([](u32) { - Fifo::SyncGPUForRegisterAccess(); - return ReadHigh(fifo.CPReadWriteDistance); - }), + MMIO::ComplexRead([](u32) { + Fifo::SyncGPUForRegisterAccess(); + return fifo.CPReadWriteDistance.load(std::memory_order_relaxed) >> 16; + }), MMIO::ComplexWrite([WMASK_HI_RESTRICT](u32, u16 val) { Fifo::SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadWriteDistance, val & WMASK_HI_RESTRICT); @@ -297,16 +309,17 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) mmio->Register(base | FIFO_READ_POINTER_HI, IsOnThread() ? MMIO::ComplexRead([](u32) { Fifo::SyncGPUForRegisterAccess(); - return ReadHigh(fifo.SafeCPReadPointer); + return fifo.SafeCPReadPointer.load(std::memory_order_relaxed) >> 16; }) : MMIO::ComplexRead([](u32) { Fifo::SyncGPUForRegisterAccess(); - return ReadHigh(fifo.CPReadPointer); + return fifo.CPReadPointer.load(std::memory_order_relaxed) >> 16; }), IsOnThread() ? MMIO::ComplexWrite([WMASK_HI_RESTRICT](u32, u16 val) { Fifo::SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT); - fifo.SafeCPReadPointer = fifo.CPReadPointer; + fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed), + std::memory_order_relaxed); }) : MMIO::ComplexWrite([WMASK_HI_RESTRICT](u32, u16 val) { Fifo::SyncGPUForRegisterAccess(); @@ -325,8 +338,9 @@ void GatherPipeBursted() { // In multibuffer mode is not allowed write in the same FIFO attached to the GPU. // Fix Pokemon XD in DC mode. - if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd) && - (ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0) + if ((ProcessorInterface::Fifo_CPUEnd == fifo.CPEnd.load(std::memory_order_relaxed)) && + (ProcessorInterface::Fifo_CPUBase == fifo.CPBase.load(std::memory_order_relaxed)) && + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > 0) { Fifo::FlushGpu(); } @@ -336,35 +350,47 @@ void GatherPipeBursted() } // update the fifo pointer - if (fifo.CPWritePointer == fifo.CPEnd) - fifo.CPWritePointer = fifo.CPBase; + if (fifo.CPWritePointer.load(std::memory_order_relaxed) == + fifo.CPEnd.load(std::memory_order_relaxed)) + { + fifo.CPWritePointer.store(fifo.CPBase, std::memory_order_relaxed); + } else - fifo.CPWritePointer += GATHER_PIPE_SIZE; + { + fifo.CPWritePointer.fetch_add(GATHER_PIPE_SIZE, std::memory_order_relaxed); + } if (m_CPCtrlReg.GPReadEnable && m_CPCtrlReg.GPLinkEnable) { - ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer; - ProcessorInterface::Fifo_CPUBase = fifo.CPBase; - ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd; + ProcessorInterface::Fifo_CPUWritePointer = fifo.CPWritePointer.load(std::memory_order_relaxed); + ProcessorInterface::Fifo_CPUBase = fifo.CPBase.load(std::memory_order_relaxed); + ProcessorInterface::Fifo_CPUEnd = fifo.CPEnd.load(std::memory_order_relaxed); } // If the game is running close to overflowing, make the exception checking more frequent. if (fifo.bFF_HiWatermark) CoreTiming::ForceExceptionCheck(0); - Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE); + fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst); Fifo::RunGpu(); - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) <= + fifo.CPEnd.load(std::memory_order_relaxed) - + fifo.CPBase.load(std::memory_order_relaxed), "FIFO is overflowed by GatherPipe !\nCPU thread is too fast!"); // check if we are in sync - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPWritePointer.load(std::memory_order_relaxed) == + ProcessorInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync"); - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPBase == ProcessorInterface::Fifo_CPUBase, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPBase.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUBase, "FIFOs linked but out of sync"); - ASSERT_MSG(COMMANDPROCESSOR, fifo.CPEnd == ProcessorInterface::Fifo_CPUEnd, + ASSERT_MSG(COMMANDPROCESSOR, + fifo.CPEnd.load(std::memory_order_relaxed) == ProcessorInterface::Fifo_CPUEnd, "FIFOs linked but out of sync"); } @@ -403,31 +429,41 @@ void SetCPStatusFromGPU() // breakpoint if (fifo.bFF_BPEnable) { - if (fifo.CPBreakpoint == fifo.CPReadPointer) + if (fifo.CPBreakpoint.load(std::memory_order_relaxed) == + fifo.CPReadPointer.load(std::memory_order_relaxed)) { if (!fifo.bFF_Breakpoint) { - DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}", fifo.CPReadPointer); + DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}", + fifo.CPReadPointer.load(std::memory_order_relaxed)); fifo.bFF_Breakpoint = true; } } else { if (fifo.bFF_Breakpoint) - DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer); + { + DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", + fifo.CPReadPointer.load(std::memory_order_relaxed)); + } fifo.bFF_Breakpoint = false; } } else { if (fifo.bFF_Breakpoint) - DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", fifo.CPReadPointer); + { + DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}", + fifo.CPReadPointer.load(std::memory_order_relaxed)); + } fifo.bFF_Breakpoint = false; } // overflow & underflow check - fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark); - fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark); + fifo.bFF_HiWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark); + fifo.bFF_LoWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark); bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt; bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt; @@ -457,8 +493,10 @@ void SetCPStatusFromGPU() void SetCPStatusFromCPU() { // overflow & underflow check - fifo.bFF_HiWatermark = (fifo.CPReadWriteDistance > fifo.CPHiWatermark); - fifo.bFF_LoWatermark = (fifo.CPReadWriteDistance < fifo.CPLoWatermark); + fifo.bFF_HiWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark); + fifo.bFF_LoWatermark = + (fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark); bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt; bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt; @@ -489,9 +527,11 @@ void SetCpStatusRegister() { // Here always there is one fifo attached to the GPU m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint; - m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance || (fifo.CPReadPointer == fifo.CPWritePointer); - m_CPStatusReg.CommandIdle = - !fifo.CPReadWriteDistance || Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable; + m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) || + (fifo.CPReadPointer.load(std::memory_order_relaxed) == + fifo.CPWritePointer.load(std::memory_order_relaxed)); + m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) || + Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable; m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark; m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark; @@ -548,29 +588,32 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess) cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false"); { - PanicAlertFmt("Illegal command {:02x}\n" - "CPBase: {:#010x}\n" - "CPEnd: {:#010x}\n" - "CPHiWatermark: {:#010x}\n" - "CPLoWatermark: {:#010x}\n" - "CPReadWriteDistance: {:#010x}\n" - "CPWritePointer: {:#010x}\n" - "CPReadPointer: {:#010x}\n" - "CPBreakpoint: {:#010x}\n" - "bFF_GPReadEnable: {}\n" - "bFF_BPEnable: {}\n" - "bFF_BPInt: {}\n" - "bFF_Breakpoint: {}\n" - "bFF_GPLinkEnable: {}\n" - "bFF_HiWatermarkInt: {}\n" - "bFF_LoWatermarkInt: {}\n", - cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, - fifo.CPReadWriteDistance, fifo.CPWritePointer, fifo.CPReadPointer, - fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false", - fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false", - fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false", - fifo.bFF_HiWatermarkInt ? "true" : "false", - fifo.bFF_LoWatermarkInt ? "true" : "false"); + PanicAlertFmt( + "Illegal command {:02x}\n" + "CPBase: {:#010x}\n" + "CPEnd: {:#010x}\n" + "CPHiWatermark: {:#010x}\n" + "CPLoWatermark: {:#010x}\n" + "CPReadWriteDistance: {:#010x}\n" + "CPWritePointer: {:#010x}\n" + "CPReadPointer: {:#010x}\n" + "CPBreakpoint: {:#010x}\n" + "bFF_GPReadEnable: {}\n" + "bFF_BPEnable: {}\n" + "bFF_BPInt: {}\n" + "bFF_Breakpoint: {}\n" + "bFF_GPLinkEnable: {}\n" + "bFF_HiWatermarkInt: {}\n" + "bFF_LoWatermarkInt: {}\n", + cmd_byte, fifo.CPBase.load(std::memory_order_relaxed), + fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark, fifo.CPLoWatermark, + fifo.CPReadWriteDistance.load(std::memory_order_relaxed), + fifo.CPWritePointer.load(std::memory_order_relaxed), + fifo.CPReadPointer.load(std::memory_order_relaxed), + fifo.CPBreakpoint.load(std::memory_order_relaxed), fifo.bFF_GPReadEnable ? "true" : "false", + fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false", + fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false", + fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_LoWatermarkInt ? "true" : "false"); } } diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index c246d5651d..bc0a8ede49 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/CommonTypes.h" class PointerWrap; @@ -17,15 +19,15 @@ namespace CommandProcessor struct SCPFifoStruct { // fifo registers - volatile u32 CPBase; - volatile u32 CPEnd; + std::atomic CPBase; + std::atomic CPEnd; u32 CPHiWatermark; u32 CPLoWatermark; - volatile u32 CPReadWriteDistance; - volatile u32 CPWritePointer; - volatile u32 CPReadPointer; - volatile u32 CPBreakpoint; - volatile u32 SafeCPReadPointer; + std::atomic CPReadWriteDistance; + std::atomic CPWritePointer; + std::atomic CPReadPointer; + std::atomic CPBreakpoint; + std::atomic SafeCPReadPointer; volatile u32 bFF_GPLinkEnable; volatile u32 bFF_GPReadEnable; diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 5aede12b96..f12776449c 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -8,7 +8,6 @@ #include #include "Common/Assert.h" -#include "Common/Atomic.h" #include "Common/BlockingLoop.h" #include "Common/ChunkFile.h" #include "Common/Event.h" @@ -329,33 +328,37 @@ void RunGpuLoop() // check if we are able to run this buffer while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && - fifo.CPReadWriteDistance && !AtBreakpoint()) + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint()) { if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance) break; u32 cyclesExecuted = 0; - u32 readPtr = fifo.CPReadPointer; + u32 readPtr = fifo.CPReadPointer.load(std::memory_order_relaxed); ReadDataFromFifo(readPtr); - if (readPtr == fifo.CPEnd) - readPtr = fifo.CPBase; + if (readPtr == fifo.CPEnd.load(std::memory_order_relaxed)) + readPtr = fifo.CPBase.load(std::memory_order_relaxed); else readPtr += 32; - ASSERT_MSG(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0, + ASSERT_MSG(COMMANDPROCESSOR, + (s32)fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32 >= 0, "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce " "instability in the game. Please report it.", - fifo.CPReadWriteDistance - 32); + fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32); u8* write_ptr = s_video_buffer_write_ptr; s_video_buffer_read_ptr = OpcodeDecoder::Run( DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); - Common::AtomicStore(fifo.CPReadPointer, readPtr); - Common::AtomicAdd(fifo.CPReadWriteDistance, static_cast(-32)); + fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed); + fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst); if ((write_ptr - s_video_buffer_read_ptr) == 0) - Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); + { + fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed), + std::memory_order_relaxed); + } CommandProcessor::SetCPStatusFromGPU(); @@ -412,7 +415,8 @@ void GpuMaySleep() bool AtBreakpoint() { CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; - return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint); + return fifo.bFF_BPEnable && (fifo.CPReadPointer.load(std::memory_order_relaxed) == + fifo.CPBreakpoint.load(std::memory_order_relaxed)); } void RunGpu() @@ -442,12 +446,12 @@ static int RunGpuOnCpu(int ticks) CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; bool reset_simd_state = false; int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load(); - while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() && - available_ticks >= 0) + while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && + !AtBreakpoint() && available_ticks >= 0) { if (s_use_deterministic_gpu_thread) { - ReadDataFromFifoOnCPU(fifo.CPReadPointer); + ReadDataFromFifoOnCPU(fifo.CPReadPointer.load(std::memory_order_relaxed)); s_gpu_mainloop.Wakeup(); } else @@ -458,19 +462,25 @@ static int RunGpuOnCpu(int ticks) FPURoundMode::LoadDefaultSIMDState(); reset_simd_state = true; } - ReadDataFromFifo(fifo.CPReadPointer); + ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed)); u32 cycles = 0; s_video_buffer_read_ptr = OpcodeDecoder::Run( DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); available_ticks -= cycles; } - if (fifo.CPReadPointer == fifo.CPEnd) - fifo.CPReadPointer = fifo.CPBase; + if (fifo.CPReadPointer.load(std::memory_order_relaxed) == + fifo.CPEnd.load(std::memory_order_relaxed)) + { + fifo.CPReadPointer.store(fifo.CPBase.load(std::memory_order_relaxed), + std::memory_order_relaxed); + } else - fifo.CPReadPointer += 32; + { + fifo.CPReadPointer.fetch_add(32, std::memory_order_relaxed); + } - fifo.CPReadWriteDistance -= 32; + fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_relaxed); } CommandProcessor::SetCPStatusFromGPU(); diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 710e8c5e75..93f8fa2279 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -890,7 +890,9 @@ void Renderer::CheckFifoRecording() RecordVideoMemory(); } - FifoRecorder::GetInstance().EndFrame(CommandProcessor::fifo.CPBase, CommandProcessor::fifo.CPEnd); + FifoRecorder::GetInstance().EndFrame( + CommandProcessor::fifo.CPBase.load(std::memory_order_relaxed), + CommandProcessor::fifo.CPEnd.load(std::memory_order_relaxed)); } void Renderer::RecordVideoMemory()