From 5624dd6d3982aebab498dcc99dafbcfc00983b4f Mon Sep 17 00:00:00 2001 From: "Admiral H. Curtiss" Date: Fri, 9 Dec 2022 20:01:25 +0100 Subject: [PATCH] VideoCommon/Fifo: Refactor to class, move to Core::System. --- Source/Core/Core/Core.cpp | 18 +- Source/Core/Core/CoreTiming.cpp | 3 +- Source/Core/Core/HW/CPU.cpp | 3 +- Source/Core/Core/HW/SystemTimers.cpp | 2 +- Source/Core/Core/System.cpp | 7 + Source/Core/Core/System.h | 5 + Source/Core/DolphinQt/Host.cpp | 5 +- Source/Core/VideoCommon/AsyncRequests.cpp | 6 +- Source/Core/VideoCommon/BPStructs.cpp | 15 +- Source/Core/VideoCommon/CommandProcessor.cpp | 46 +-- Source/Core/VideoCommon/CommandProcessor.h | 4 +- Source/Core/VideoCommon/Fifo.cpp | 389 +++++++++---------- Source/Core/VideoCommon/Fifo.h | 125 ++++-- Source/Core/VideoCommon/OpcodeDecoding.cpp | 11 +- Source/Core/VideoCommon/PixelEngine.cpp | 3 +- Source/Core/VideoCommon/VideoBackendBase.cpp | 22 +- Source/Core/VideoCommon/VideoState.cpp | 4 +- Source/Core/VideoCommon/XFStructs.cpp | 9 +- 18 files changed, 377 insertions(+), 300 deletions(-) diff --git a/Source/Core/Core/Core.cpp b/Source/Core/Core/Core.cpp index 17eced199b..6dbc75468d 100644 --- a/Source/Core/Core/Core.cpp +++ b/Source/Core/Core/Core.cpp @@ -277,7 +277,9 @@ void Stop() // - Hammertime! // Dump left over jobs HostDispatchJobs(); - Fifo::EmulatorState(false); + auto& system = Core::System::GetInstance(); + + system.GetFifo().EmulatorState(false); INFO_LOG_FMT(CONSOLE, "Stop [Main Thread]\t\t---- Shutting down ----"); @@ -285,7 +287,7 @@ void Stop() // - Hammertime! INFO_LOG_FMT(CONSOLE, "{}", StopMessage(true, "Stop CPU")); CPU::Stop(); - if (Core::System::GetInstance().IsDualCoreMode()) + if (system.IsDualCoreMode()) { // Video_EnterLoop() should now exit so that EmuThread() // will continue concurrently with the rest of the commands @@ -597,7 +599,7 @@ static void EmuThread(std::unique_ptr boot, WindowSystemInfo wsi wiifs_guard.Dismiss(); // This adds the SyncGPU handler to CoreTiming, so now CoreTiming::Advance might block. - Fifo::Prepare(); + system.GetFifo().Prepare(); // Setup our core if (Config::Get(Config::MAIN_CPU_CORE) != PowerPC::CPUCore::Interpreter) @@ -622,7 +624,7 @@ static void EmuThread(std::unique_ptr boot, WindowSystemInfo wsi s_cpu_thread = std::thread(cpuThreadFunc, savestate_path, delete_savestate); // become the GPU thread - Fifo::RunGpuLoop(); + system.GetFifo().RunGpuLoop(); // We have now exited the Video Loop INFO_LOG_FMT(CONSOLE, "{}", StopMessage(false, "Video Loop Ended")); @@ -766,7 +768,8 @@ static bool PauseAndLock(bool do_lock, bool unpause_on_unlock) // video has to come after CPU, because CPU thread can wait for video thread // (s_efbAccessRequested). - Fifo::PauseAndLock(do_lock, false); + auto& system = Core::System::GetInstance(); + system.GetFifo().PauseAndLock(do_lock, false); ResetRumble(); @@ -1029,7 +1032,10 @@ void UpdateWantDeterminism(bool initial) const auto ios = IOS::HLE::GetIOS(); if (ios) ios->UpdateWantDeterminism(new_want_determinism); - Fifo::UpdateWantDeterminism(new_want_determinism); + + auto& system = Core::System::GetInstance(); + system.GetFifo().UpdateWantDeterminism(new_want_determinism); + // We need to clear the cache because some parts of the JIT depend on want_determinism, // e.g. use of FMA. JitInterface::ClearCache(); diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 4bdd53df31..a25700f6ad 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -354,7 +354,8 @@ void CoreTimingManager::Idle() // When the FIFO is processing data we must not advance because in this way // the VI will be desynchronized. So, We are waiting until the FIFO finish and // while we process only the events required by the FIFO. - Fifo::FlushGpu(); + auto& system = Core::System::GetInstance(); + system.GetFifo().FlushGpu(); } PowerPC::UpdatePerformanceMonitor(PowerPC::ppcState.downcount, 0, 0); diff --git a/Source/Core/Core/HW/CPU.cpp b/Source/Core/Core/HW/CPU.cpp index 4345954eac..05898b1f8c 100644 --- a/Source/Core/Core/HW/CPU.cpp +++ b/Source/Core/Core/HW/CPU.cpp @@ -191,7 +191,8 @@ void Run() static void RunAdjacentSystems(bool running) { // NOTE: We're assuming these will not try to call Break or EnableStepping. - Fifo::EmulatorState(running); + auto& system = Core::System::GetInstance(); + system.GetFifo().EmulatorState(running); // Core is responsible for shutting down the sound stream. if (s_state != State::PowerDown) AudioCommon::SetSoundStreamRunning(Core::System::GetInstance(), running); diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index ff3b408920..d7908f10e1 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -173,7 +173,7 @@ void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late) void ThrottleCallback(Core::System& system, u64 deadline, s64 cyclesLate) { // Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz. - Fifo::GpuMaySleep(); + system.GetFifo().GpuMaySleep(); const u64 time = Common::Timer::NowUs(); diff --git a/Source/Core/Core/System.cpp b/Source/Core/Core/System.cpp index 79903815a4..e0547eb3c4 100644 --- a/Source/Core/Core/System.cpp +++ b/Source/Core/Core/System.cpp @@ -19,6 +19,7 @@ #include "Core/HW/Sram.h" #include "Core/HW/VideoInterface.h" #include "VideoCommon/CommandProcessor.h" +#include "VideoCommon/Fifo.h" namespace Core { @@ -35,6 +36,7 @@ struct System::Impl DVDInterface::DVDInterfaceState m_dvd_interface_state; DVDThread::DVDThreadState m_dvd_thread_state; ExpansionInterface::ExpansionInterfaceState m_expansion_interface_state; + Fifo::FifoManager m_fifo; Memory::MemoryManager m_memory; MemoryInterface::MemoryInterfaceState m_memory_interface_state; SerialInterface::SerialInterfaceState m_serial_interface_state; @@ -120,6 +122,11 @@ ExpansionInterface::ExpansionInterfaceState& System::GetExpansionInterfaceState( return m_impl->m_expansion_interface_state; } +Fifo::FifoManager& System::GetFifo() const +{ + return m_impl->m_fifo; +} + Memory::MemoryManager& System::GetMemory() const { return m_impl->m_memory; diff --git a/Source/Core/Core/System.h b/Source/Core/Core/System.h index cd498ae6bb..70ce7d587c 100644 --- a/Source/Core/Core/System.h +++ b/Source/Core/Core/System.h @@ -36,6 +36,10 @@ namespace ExpansionInterface { class ExpansionInterfaceState; }; +namespace Fifo +{ +class FifoManager; +} namespace Memory { class MemoryManager; @@ -94,6 +98,7 @@ public: DVDInterface::DVDInterfaceState& GetDVDInterfaceState() const; DVDThread::DVDThreadState& GetDVDThreadState() const; ExpansionInterface::ExpansionInterfaceState& GetExpansionInterfaceState() const; + Fifo::FifoManager& GetFifo() const; Memory::MemoryManager& GetMemory() const; MemoryInterface::MemoryInterfaceState& GetMemoryInterfaceState() const; SerialInterface::SerialInterfaceState& GetSerialInterfaceState() const; diff --git a/Source/Core/DolphinQt/Host.cpp b/Source/Core/DolphinQt/Host.cpp index 5ccd10189c..0b7207cb49 100644 --- a/Source/Core/DolphinQt/Host.cpp +++ b/Source/Core/DolphinQt/Host.cpp @@ -114,9 +114,10 @@ static void RunWithGPUThreadInactive(std::function f) // the CPU and GPU threads are the same thread, and we already checked for the GPU thread.) const bool was_running = Core::GetState() == Core::State::Running; - Fifo::PauseAndLock(true, was_running); + auto& fifo = Core::System::GetInstance().GetFifo(); + fifo.PauseAndLock(true, was_running); f(); - Fifo::PauseAndLock(false, was_running); + fifo.PauseAndLock(false, was_running); } else { diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index 0371683376..7e3b329ad6 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -5,6 +5,7 @@ #include +#include "Core/System.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" @@ -90,7 +91,8 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking) m_queue.push(event); - Fifo::RunGpu(); + auto& system = Core::System::GetInstance(); + system.GetFifo().RunGpu(); if (blocking) { m_cond.wait(lock, [this] { return m_queue.empty(); }); @@ -159,7 +161,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) break; case Event::FIFO_RESET: - Fifo::ResetVideoBuffer(); + Core::System::GetInstance().GetFifo().ResetVideoBuffer(); break; case Event::PERF_QUERY: diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 1bc9f69150..cc390a5d79 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -179,14 +179,17 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) switch (bp.newvalue & 0xFF) { case 0x02: + { INCSTAT(g_stats.this_frame.num_draw_done); g_texture_cache->FlushEFBCopies(); g_framebuffer_manager->InvalidatePeekCache(false); g_framebuffer_manager->RefreshPeekCache(); - if (!Fifo::UseDeterministicGPUThread()) + auto& system = Core::System::GetInstance(); + if (!system.GetFifo().UseDeterministicGPUThread()) PixelEngine::SetFinish(cycles_into_future); // may generate interrupt DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF); return; + } default: WARN_LOG_FMT(VIDEO, "GXSetDrawDone ??? (value {:#04X})", bp.newvalue & 0xFFFF); @@ -194,23 +197,29 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) } return; case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID + { INCSTAT(g_stats.this_frame.num_token); g_texture_cache->FlushEFBCopies(); g_framebuffer_manager->InvalidatePeekCache(false); g_framebuffer_manager->RefreshPeekCache(); - if (!Fifo::UseDeterministicGPUThread()) + auto& system = Core::System::GetInstance(); + if (!system.GetFifo().UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), false, cycles_into_future); DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF); return; + } case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID + { INCSTAT(g_stats.this_frame.num_token_int); g_texture_cache->FlushEFBCopies(); g_framebuffer_manager->InvalidatePeekCache(false); g_framebuffer_manager->RefreshPeekCache(); - if (!Fifo::UseDeterministicGPUThread()) + auto& system = Core::System::GetInstance(); + if (!system.GetFifo().UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), true, cycles_into_future); DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF); return; + } // ------------------------ // EFB copy command. This copies a rectangle from the EFB to either RAM in a texture format or to diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 7d31b1d406..663539a678 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -223,8 +223,8 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* mmio->Register(base | STATUS_REGISTER, MMIO::ComplexRead([](Core::System& system, u32) { auto& cp = system.GetCommandProcessor(); - Fifo::SyncGPUForRegisterAccess(); - cp.SetCpStatusRegister(); + system.GetFifo().SyncGPUForRegisterAccess(); + cp.SetCpStatusRegister(system); return cp.m_cp_status_reg.Hex; }), MMIO::InvalidWrite()); @@ -234,8 +234,8 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* auto& cp = system.GetCommandProcessor(); UCPCtrlReg tmp(val); cp.m_cp_ctrl_reg.Hex = tmp.Hex; - cp.SetCpControlRegister(); - Fifo::RunGpu(); + cp.SetCpControlRegister(system); + system.GetFifo().RunGpu(); })); mmio->Register(base | CLEAR_REGISTER, MMIO::DirectRead(&m_cp_clear_reg.Hex), @@ -244,7 +244,7 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* UCPClearReg tmp(val); cp.m_cp_clear_reg.Hex = tmp.Hex; cp.SetCpClearRegister(); - Fifo::RunGpu(); + system.GetFifo().RunGpu(); })); mmio->Register(base | PERF_SELECT, MMIO::InvalidRead(), MMIO::Nop()); @@ -284,7 +284,7 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* { fifo_rw_distance_hi_r = MMIO::ComplexRead([](Core::System& system, u32) { const auto& fifo = system.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + system.GetFifo().SyncGPUForRegisterAccess(); if (fifo.CPWritePointer.load(std::memory_order_relaxed) >= fifo.SafeCPReadPointer.load(std::memory_order_relaxed)) { @@ -306,16 +306,16 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* { fifo_rw_distance_hi_r = MMIO::ComplexRead([](Core::System& system, u32) { const auto& fifo = system.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + system.GetFifo().SyncGPUForRegisterAccess(); return fifo.CPReadWriteDistance.load(std::memory_order_relaxed) >> 16; }); } mmio->Register(base | FIFO_RW_DISTANCE_HI, fifo_rw_distance_hi_r, MMIO::ComplexWrite([WMASK_HI_RESTRICT](Core::System& system, u32, u16 val) { auto& fifo = system.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + system.GetFifo().SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadWriteDistance, val & WMASK_HI_RESTRICT); - Fifo::RunGpu(); + system.GetFifo().RunGpu(); })); mmio->Register( @@ -330,12 +330,12 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* { fifo_read_hi_r = MMIO::ComplexRead([](Core::System& system, u32) { auto& fifo = system.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + system.GetFifo().SyncGPUForRegisterAccess(); return fifo.SafeCPReadPointer.load(std::memory_order_relaxed) >> 16; }); fifo_read_hi_w = MMIO::ComplexWrite([WMASK_HI_RESTRICT](Core::System& sys, u32, u16 val) { auto& fifo = sys.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + sys.GetFifo().SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT); fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed), std::memory_order_relaxed); @@ -345,12 +345,12 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping* { fifo_read_hi_r = MMIO::ComplexRead([](Core::System& system, u32) { const auto& fifo = system.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + system.GetFifo().SyncGPUForRegisterAccess(); return fifo.CPReadPointer.load(std::memory_order_relaxed) >> 16; }); fifo_read_hi_w = MMIO::ComplexWrite([WMASK_HI_RESTRICT](Core::System& sys, u32, u16 val) { auto& fifo = sys.GetCommandProcessor().GetFifo(); - Fifo::SyncGPUForRegisterAccess(); + sys.GetFifo().SyncGPUForRegisterAccess(); WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT); }); } @@ -366,7 +366,7 @@ void CommandProcessorManager::GatherPipeBursted(Core::System& system) // if we aren't linked, we don't care about gather pipe data if (!m_cp_ctrl_reg.GPLinkEnable) { - if (IsOnThread(system) && !Fifo::UseDeterministicGPUThread()) + if (IsOnThread(system) && !system.GetFifo().UseDeterministicGPUThread()) { // In multibuffer mode is not allowed write in the same FIFO attached to the GPU. // Fix Pokemon XD in DC mode. @@ -374,10 +374,10 @@ void CommandProcessorManager::GatherPipeBursted(Core::System& system) (ProcessorInterface::Fifo_CPUBase == fifo.CPBase.load(std::memory_order_relaxed)) && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > 0) { - Fifo::FlushGpu(); + system.GetFifo().FlushGpu(); } } - Fifo::RunGpu(); + system.GetFifo().RunGpu(); return; } @@ -405,7 +405,7 @@ void CommandProcessorManager::GatherPipeBursted(Core::System& system) fifo.CPReadWriteDistance.fetch_add(GPFifo::GATHER_PIPE_SIZE, std::memory_order_seq_cst); - Fifo::RunGpu(); + system.GetFifo().RunGpu(); ASSERT_MSG(COMMANDPROCESSOR, fifo.CPReadWriteDistance.load(std::memory_order_relaxed) <= @@ -442,12 +442,12 @@ void CommandProcessorManager::UpdateInterrupts(Core::System& system, u64 userdat } system.GetCoreTiming().ForceExceptionCheck(0); m_interrupt_waiting.Clear(); - Fifo::RunGpu(); + system.GetFifo().RunGpu(); } void CommandProcessorManager::UpdateInterruptsFromVideoBackend(Core::System& system, u64 userdata) { - if (!Fifo::UseDeterministicGPUThread()) + if (!system.GetFifo().UseDeterministicGPUThread()) { system.GetCoreTiming().ScheduleEvent(0, m_event_type_update_interrupts, userdata, CoreTiming::FromThread::NON_CPU); @@ -573,7 +573,7 @@ void CommandProcessorManager::SetCPStatusFromCPU(Core::System& system) } } -void CommandProcessorManager::SetCpStatusRegister() +void CommandProcessorManager::SetCpStatusRegister(Core::System& system) { const auto& fifo = m_fifo; @@ -583,7 +583,7 @@ void CommandProcessorManager::SetCpStatusRegister() (fifo.CPReadPointer.load(std::memory_order_relaxed) == fifo.CPWritePointer.load(std::memory_order_relaxed)); m_cp_status_reg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) || - Fifo::AtBreakpoint() || + system.GetFifo().AtBreakpoint() || !fifo.bFF_GPReadEnable.load(std::memory_order_relaxed); m_cp_status_reg.UnderflowLoWatermark = fifo.bFF_LoWatermark.load(std::memory_order_relaxed); m_cp_status_reg.OverflowHiWatermark = fifo.bFF_HiWatermark.load(std::memory_order_relaxed); @@ -597,7 +597,7 @@ void CommandProcessorManager::SetCpStatusRegister() m_cp_status_reg.UnderflowLoWatermark ? "ON" : "OFF"); } -void CommandProcessorManager::SetCpControlRegister() +void CommandProcessorManager::SetCpControlRegister(Core::System& system) { auto& fifo = m_fifo; @@ -610,7 +610,7 @@ void CommandProcessorManager::SetCpControlRegister() if (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) && !m_cp_ctrl_reg.GPReadEnable) { fifo.bFF_GPReadEnable.store(m_cp_ctrl_reg.GPReadEnable, std::memory_order_relaxed); - Fifo::FlushGpu(); + system.GetFifo().FlushGpu(); } else { diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index a54aa2a42e..34b1e8701f 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -174,8 +174,8 @@ public: bool IsInterruptWaiting() const; void SetCpClearRegister(); - void SetCpControlRegister(); - void SetCpStatusRegister(); + void SetCpControlRegister(Core::System& system); + void SetCpStatusRegister(Core::System& system); void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess); diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 617a9d73f7..d222157b93 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -34,88 +34,47 @@ namespace Fifo { -static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024; static constexpr int GPU_TIME_SLOT_SIZE = 1000; -static Common::BlockingLoop s_gpu_mainloop; +FifoManager::FifoManager() = default; +FifoManager::~FifoManager() = default; -static Common::Flag s_emu_running_state; - -// Most of this array is unlikely to be faulted in... -static u8 s_fifo_aux_data[FIFO_SIZE]; -static u8* s_fifo_aux_write_ptr; -static u8* s_fifo_aux_read_ptr; - -// This could be in SConfig, but it depends on multiple settings -// and can change at runtime. -static bool s_use_deterministic_gpu_thread; - -static CoreTiming::EventType* s_event_sync_gpu; - -// STATE_TO_SAVE -static u8* s_video_buffer; -static u8* s_video_buffer_read_ptr; -static std::atomic s_video_buffer_write_ptr; -static std::atomic s_video_buffer_seen_ptr; -static u8* s_video_buffer_pp_read_ptr; -// The read_ptr is always owned by the GPU thread. In normal mode, so is the -// write_ptr, despite it being atomic. In deterministic GPU thread mode, -// things get a bit more complicated: -// - The seen_ptr is written by the GPU thread, and points to what it's already -// processed as much of as possible - in the case of a partial command which -// caused it to stop, not the same as the read ptr. It's written by the GPU, -// under the lock, and updating the cond. -// - The write_ptr is written by the CPU thread after it copies data from the -// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop -// polls, it's just atomic. -// - The pp_read_ptr is the CPU preprocessing version of the read_ptr. - -static std::atomic s_sync_ticks; -static bool s_syncing_suspended; -static Common::Event s_sync_wakeup_event; - -static std::optional s_config_callback_id = std::nullopt; -static bool s_config_sync_gpu = false; -static int s_config_sync_gpu_max_distance = 0; -static int s_config_sync_gpu_min_distance = 0; -static float s_config_sync_gpu_overclock = 0.0f; - -static void RefreshConfig() +void FifoManager::RefreshConfig() { - s_config_sync_gpu = Config::Get(Config::MAIN_SYNC_GPU); - s_config_sync_gpu_max_distance = Config::Get(Config::MAIN_SYNC_GPU_MAX_DISTANCE); - s_config_sync_gpu_min_distance = Config::Get(Config::MAIN_SYNC_GPU_MIN_DISTANCE); - s_config_sync_gpu_overclock = Config::Get(Config::MAIN_SYNC_GPU_OVERCLOCK); + m_config_sync_gpu = Config::Get(Config::MAIN_SYNC_GPU); + m_config_sync_gpu_max_distance = Config::Get(Config::MAIN_SYNC_GPU_MAX_DISTANCE); + m_config_sync_gpu_min_distance = Config::Get(Config::MAIN_SYNC_GPU_MIN_DISTANCE); + m_config_sync_gpu_overclock = Config::Get(Config::MAIN_SYNC_GPU_OVERCLOCK); } -void DoState(PointerWrap& p) +void FifoManager::DoState(PointerWrap& p) { - p.DoArray(s_video_buffer, FIFO_SIZE); - u8* write_ptr = s_video_buffer_write_ptr; - p.DoPointer(write_ptr, s_video_buffer); - s_video_buffer_write_ptr = write_ptr; - p.DoPointer(s_video_buffer_read_ptr, s_video_buffer); - if (p.IsReadMode() && s_use_deterministic_gpu_thread) + p.DoArray(m_video_buffer, FIFO_SIZE); + u8* write_ptr = m_video_buffer_write_ptr; + p.DoPointer(write_ptr, m_video_buffer); + m_video_buffer_write_ptr = write_ptr; + p.DoPointer(m_video_buffer_read_ptr, m_video_buffer); + if (p.IsReadMode() && m_use_deterministic_gpu_thread) { // We're good and paused, right? - s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr; + m_video_buffer_seen_ptr = m_video_buffer_pp_read_ptr = m_video_buffer_read_ptr; } - p.Do(s_sync_ticks); - p.Do(s_syncing_suspended); + p.Do(m_sync_ticks); + p.Do(m_syncing_suspended); } -void PauseAndLock(bool doLock, bool unpauseOnUnlock) +void FifoManager::PauseAndLock(bool doLock, bool unpauseOnUnlock) { if (doLock) { SyncGPU(SyncGPUReason::Other); EmulatorState(false); - if (!Core::System::GetInstance().IsDualCoreMode() || s_use_deterministic_gpu_thread) + if (!Core::System::GetInstance().IsDualCoreMode() || m_use_deterministic_gpu_thread) return; - s_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI); + m_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI); } else { @@ -124,44 +83,44 @@ void PauseAndLock(bool doLock, bool unpauseOnUnlock) } } -void Init() +void FifoManager::Init() { - if (!s_config_callback_id) - s_config_callback_id = Config::AddConfigChangedCallback(RefreshConfig); + if (!m_config_callback_id) + m_config_callback_id = Config::AddConfigChangedCallback([this] { RefreshConfig(); }); RefreshConfig(); // Padded so that SIMD overreads in the vertex loader are safe - s_video_buffer = static_cast(Common::AllocateMemoryPages(FIFO_SIZE + 4)); + m_video_buffer = static_cast(Common::AllocateMemoryPages(FIFO_SIZE + 4)); ResetVideoBuffer(); if (Core::System::GetInstance().IsDualCoreMode()) - s_gpu_mainloop.Prepare(); - s_sync_ticks.store(0); + m_gpu_mainloop.Prepare(); + m_sync_ticks.store(0); } -void Shutdown() +void FifoManager::Shutdown() { - if (s_gpu_mainloop.IsRunning()) + if (m_gpu_mainloop.IsRunning()) PanicAlertFmt("FIFO shutting down while active"); - Common::FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4); - s_video_buffer = nullptr; - s_video_buffer_write_ptr = nullptr; - s_video_buffer_pp_read_ptr = nullptr; - s_video_buffer_read_ptr = nullptr; - s_video_buffer_seen_ptr = nullptr; - s_fifo_aux_write_ptr = nullptr; - s_fifo_aux_read_ptr = nullptr; + Common::FreeMemoryPages(m_video_buffer, FIFO_SIZE + 4); + m_video_buffer = nullptr; + m_video_buffer_write_ptr = nullptr; + m_video_buffer_pp_read_ptr = nullptr; + m_video_buffer_read_ptr = nullptr; + m_video_buffer_seen_ptr = nullptr; + m_fifo_aux_write_ptr = nullptr; + m_fifo_aux_read_ptr = nullptr; - if (s_config_callback_id) + if (m_config_callback_id) { - Config::RemoveConfigChangedCallback(*s_config_callback_id); - s_config_callback_id = std::nullopt; + Config::RemoveConfigChangedCallback(*m_config_callback_id); + m_config_callback_id = std::nullopt; } } // May be executed from any thread, even the graphics thread. // Created to allow for self shutdown. -void ExitGpuLoop() +void FifoManager::ExitGpuLoop() { auto& system = Core::System::GetInstance(); auto& command_processor = system.GetCommandProcessor(); @@ -172,68 +131,68 @@ void ExitGpuLoop() FlushGpu(); // Terminate GPU thread loop - s_emu_running_state.Set(); - s_gpu_mainloop.Stop(s_gpu_mainloop.kNonBlock); + m_emu_running_state.Set(); + m_gpu_mainloop.Stop(m_gpu_mainloop.kNonBlock); } -void EmulatorState(bool running) +void FifoManager::EmulatorState(bool running) { - s_emu_running_state.Set(running); + m_emu_running_state.Set(running); if (running) - s_gpu_mainloop.Wakeup(); + m_gpu_mainloop.Wakeup(); else - s_gpu_mainloop.AllowSleep(); + m_gpu_mainloop.AllowSleep(); } -void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) +void FifoManager::SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) { - if (s_use_deterministic_gpu_thread) + if (m_use_deterministic_gpu_thread) { - s_gpu_mainloop.Wait(); - if (!s_gpu_mainloop.IsRunning()) + m_gpu_mainloop.Wait(); + if (!m_gpu_mainloop.IsRunning()) return; // Opportunistically reset FIFOs so we don't wrap around. - if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr) + if (may_move_read_ptr && m_fifo_aux_write_ptr != m_fifo_aux_read_ptr) { - PanicAlertFmt("Aux FIFO not synced ({}, {})", fmt::ptr(s_fifo_aux_write_ptr), - fmt::ptr(s_fifo_aux_read_ptr)); + PanicAlertFmt("Aux FIFO not synced ({}, {})", fmt::ptr(m_fifo_aux_write_ptr), + fmt::ptr(m_fifo_aux_read_ptr)); } - memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr); - s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data); - s_fifo_aux_read_ptr = s_fifo_aux_data; + memmove(m_fifo_aux_data, m_fifo_aux_read_ptr, m_fifo_aux_write_ptr - m_fifo_aux_read_ptr); + m_fifo_aux_write_ptr -= (m_fifo_aux_read_ptr - m_fifo_aux_data); + m_fifo_aux_read_ptr = m_fifo_aux_data; if (may_move_read_ptr) { - u8* write_ptr = s_video_buffer_write_ptr; + u8* write_ptr = m_video_buffer_write_ptr; // what's left over in the buffer - size_t size = write_ptr - s_video_buffer_pp_read_ptr; + size_t size = write_ptr - m_video_buffer_pp_read_ptr; - memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size); + memmove(m_video_buffer, m_video_buffer_pp_read_ptr, size); // This change always decreases the pointers. We write seen_ptr // after write_ptr here, and read it before in RunGpuLoop, so // 'write_ptr > seen_ptr' there cannot become spuriously true. - s_video_buffer_write_ptr = write_ptr = s_video_buffer + size; - s_video_buffer_pp_read_ptr = s_video_buffer; - s_video_buffer_read_ptr = s_video_buffer; - s_video_buffer_seen_ptr = write_ptr; + m_video_buffer_write_ptr = write_ptr = m_video_buffer + size; + m_video_buffer_pp_read_ptr = m_video_buffer; + m_video_buffer_read_ptr = m_video_buffer; + m_video_buffer_seen_ptr = write_ptr; } } } -void PushFifoAuxBuffer(const void* ptr, size_t size) +void FifoManager::PushFifoAuxBuffer(const void* ptr, size_t size) { - if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr)) + if (size > (size_t)(m_fifo_aux_data + FIFO_SIZE - m_fifo_aux_write_ptr)) { SyncGPU(SyncGPUReason::AuxSpace, /* may_move_read_ptr */ false); - if (!s_gpu_mainloop.IsRunning()) + if (!m_gpu_mainloop.IsRunning()) { // GPU is shutting down return; } - if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr)) + if (size > (size_t)(m_fifo_aux_data + FIFO_SIZE - m_fifo_aux_write_ptr)) { // That will sync us up to the last 32 bytes, so this short region // of FIFO would have to point to a 2MB display list or something. @@ -241,63 +200,63 @@ void PushFifoAuxBuffer(const void* ptr, size_t size) return; } } - memcpy(s_fifo_aux_write_ptr, ptr, size); - s_fifo_aux_write_ptr += size; + memcpy(m_fifo_aux_write_ptr, ptr, size); + m_fifo_aux_write_ptr += size; } -void* PopFifoAuxBuffer(size_t size) +void* FifoManager::PopFifoAuxBuffer(size_t size) { - void* ret = s_fifo_aux_read_ptr; - s_fifo_aux_read_ptr += size; + void* ret = m_fifo_aux_read_ptr; + m_fifo_aux_read_ptr += size; return ret; } // Description: RunGpuLoop() sends data through this function. -static void ReadDataFromFifo(u32 readPtr) +void FifoManager::ReadDataFromFifo(u32 readPtr) { if (GPFifo::GATHER_PIPE_SIZE > - static_cast(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr)) + static_cast(m_video_buffer + FIFO_SIZE - m_video_buffer_write_ptr)) { - const size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr; + const size_t existing_len = m_video_buffer_write_ptr - m_video_buffer_read_ptr; if (GPFifo::GATHER_PIPE_SIZE > static_cast(FIFO_SIZE - existing_len)) { PanicAlertFmt("FIFO out of bounds (existing {} + new {} > {})", existing_len, GPFifo::GATHER_PIPE_SIZE, FIFO_SIZE); return; } - memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len); - s_video_buffer_write_ptr = s_video_buffer + existing_len; - s_video_buffer_read_ptr = s_video_buffer; + memmove(m_video_buffer, m_video_buffer_read_ptr, existing_len); + m_video_buffer_write_ptr = m_video_buffer + existing_len; + m_video_buffer_read_ptr = m_video_buffer; } - // Copy new video instructions to s_video_buffer for future use in rendering the new picture + // Copy new video instructions to m_video_buffer for future use in rendering the new picture auto& system = Core::System::GetInstance(); auto& memory = system.GetMemory(); - memory.CopyFromEmu(s_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE); - s_video_buffer_write_ptr += GPFifo::GATHER_PIPE_SIZE; + memory.CopyFromEmu(m_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE); + m_video_buffer_write_ptr += GPFifo::GATHER_PIPE_SIZE; } // The deterministic_gpu_thread version. -static void ReadDataFromFifoOnCPU(u32 readPtr) +void FifoManager::ReadDataFromFifoOnCPU(u32 readPtr) { - u8* write_ptr = s_video_buffer_write_ptr; - if (GPFifo::GATHER_PIPE_SIZE > static_cast(s_video_buffer + FIFO_SIZE - write_ptr)) + u8* write_ptr = m_video_buffer_write_ptr; + if (GPFifo::GATHER_PIPE_SIZE > static_cast(m_video_buffer + FIFO_SIZE - write_ptr)) { // We can't wrap around while the GPU is working on the data. // This should be very rare due to the reset in SyncGPU. SyncGPU(SyncGPUReason::Wraparound); - if (!s_gpu_mainloop.IsRunning()) + if (!m_gpu_mainloop.IsRunning()) { // GPU is shutting down, so the next asserts may fail return; } - if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr) + if (m_video_buffer_pp_read_ptr != m_video_buffer_read_ptr) { PanicAlertFmt("Desynced read pointers"); return; } - write_ptr = s_video_buffer_write_ptr; - const size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr; + write_ptr = m_video_buffer_write_ptr; + const size_t existing_len = write_ptr - m_video_buffer_pp_read_ptr; if (GPFifo::GATHER_PIPE_SIZE > static_cast(FIFO_SIZE - existing_len)) { PanicAlertFmt("FIFO out of bounds (existing {} + new {} > {})", existing_len, @@ -307,50 +266,50 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) } auto& system = Core::System::GetInstance(); auto& memory = system.GetMemory(); - memory.CopyFromEmu(s_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE); - s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo( - DataReader(s_video_buffer_pp_read_ptr, write_ptr + GPFifo::GATHER_PIPE_SIZE), nullptr); + memory.CopyFromEmu(m_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE); + m_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo( + DataReader(m_video_buffer_pp_read_ptr, write_ptr + GPFifo::GATHER_PIPE_SIZE), nullptr); // This would have to be locked if the GPU thread didn't spin. - s_video_buffer_write_ptr = write_ptr + GPFifo::GATHER_PIPE_SIZE; + m_video_buffer_write_ptr = write_ptr + GPFifo::GATHER_PIPE_SIZE; } -void ResetVideoBuffer() +void FifoManager::ResetVideoBuffer() { - s_video_buffer_read_ptr = s_video_buffer; - s_video_buffer_write_ptr = s_video_buffer; - s_video_buffer_seen_ptr = s_video_buffer; - s_video_buffer_pp_read_ptr = s_video_buffer; - s_fifo_aux_write_ptr = s_fifo_aux_data; - s_fifo_aux_read_ptr = s_fifo_aux_data; + m_video_buffer_read_ptr = m_video_buffer; + m_video_buffer_write_ptr = m_video_buffer; + m_video_buffer_seen_ptr = m_video_buffer; + m_video_buffer_pp_read_ptr = m_video_buffer; + m_fifo_aux_write_ptr = m_fifo_aux_data; + m_fifo_aux_read_ptr = m_fifo_aux_data; } // Description: Main FIFO update loop // Purpose: Keep the Core HW updated about the CPU-GPU distance -void RunGpuLoop() +void FifoManager::RunGpuLoop() { AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); - s_gpu_mainloop.Run( - [] { + m_gpu_mainloop.Run( + [this] { // Run events from the CPU thread. AsyncRequests::GetInstance()->PullEvents(); // Do nothing while paused - if (!s_emu_running_state.IsSet()) + if (!m_emu_running_state.IsSet()) return; - if (s_use_deterministic_gpu_thread) + if (m_use_deterministic_gpu_thread) { // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. - u8* seen_ptr = s_video_buffer_seen_ptr; - u8* write_ptr = s_video_buffer_write_ptr; + u8* seen_ptr = m_video_buffer_seen_ptr; + u8* write_ptr = m_video_buffer_write_ptr; // See comment in SyncGPU if (write_ptr > seen_ptr) { - s_video_buffer_read_ptr = - OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr); - s_video_buffer_seen_ptr = write_ptr; + m_video_buffer_read_ptr = + OpcodeDecoder::RunFifo(DataReader(m_video_buffer_read_ptr, write_ptr), nullptr); + m_video_buffer_seen_ptr = write_ptr; } } else @@ -365,7 +324,7 @@ void RunGpuLoop() fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint()) { - if (s_config_sync_gpu && s_sync_ticks.load() < s_config_sync_gpu_min_distance) + if (m_config_sync_gpu && m_sync_ticks.load() < m_config_sync_gpu_min_distance) break; u32 cyclesExecuted = 0; @@ -385,13 +344,13 @@ void RunGpuLoop() "instability in the game. Please report it.", distance); - u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_read_ptr = OpcodeDecoder::RunFifo( - DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted); + u8* write_ptr = m_video_buffer_write_ptr; + m_video_buffer_read_ptr = OpcodeDecoder::RunFifo( + DataReader(m_video_buffer_read_ptr, write_ptr), &cyclesExecuted); fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed); fifo.CPReadWriteDistance.fetch_sub(GPFifo::GATHER_PIPE_SIZE, std::memory_order_seq_cst); - if ((write_ptr - s_video_buffer_read_ptr) == 0) + if ((write_ptr - m_video_buffer_read_ptr) == 0) { fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed), std::memory_order_relaxed); @@ -399,13 +358,15 @@ void RunGpuLoop() command_processor.SetCPStatusFromGPU(system); - if (s_config_sync_gpu) + if (m_config_sync_gpu) { - cyclesExecuted = (int)(cyclesExecuted / s_config_sync_gpu_overclock); - int old = s_sync_ticks.fetch_sub(cyclesExecuted); - if (old >= s_config_sync_gpu_max_distance && - old - (int)cyclesExecuted < s_config_sync_gpu_max_distance) - s_sync_wakeup_event.Set(); + cyclesExecuted = (int)(cyclesExecuted / m_config_sync_gpu_overclock); + int old = m_sync_ticks.fetch_sub(cyclesExecuted); + if (old >= m_config_sync_gpu_max_distance && + old - (int)cyclesExecuted < m_config_sync_gpu_max_distance) + { + m_sync_wakeup_event.Set(); + } } // This call is pretty important in DualCore mode and must be called in the FIFO Loop. @@ -416,11 +377,11 @@ void RunGpuLoop() } // fast skip remaining GPU time if fifo is empty - if (s_sync_ticks.load() > 0) + if (m_sync_ticks.load() > 0) { - int old = s_sync_ticks.exchange(0); - if (old >= s_config_sync_gpu_max_distance) - s_sync_wakeup_event.Set(); + int old = m_sync_ticks.exchange(0); + if (old >= m_config_sync_gpu_max_distance) + m_sync_wakeup_event.Set(); } // The fifo is empty and it's unlikely we will get any more work in the near future. @@ -435,20 +396,20 @@ void RunGpuLoop() AsyncRequests::GetInstance()->SetPassthrough(true); } -void FlushGpu() +void FifoManager::FlushGpu() { - if (!Core::System::GetInstance().IsDualCoreMode() || s_use_deterministic_gpu_thread) + if (!Core::System::GetInstance().IsDualCoreMode() || m_use_deterministic_gpu_thread) return; - s_gpu_mainloop.Wait(); + m_gpu_mainloop.Wait(); } -void GpuMaySleep() +void FifoManager::GpuMaySleep() { - s_gpu_mainloop.AllowSleep(); + m_gpu_mainloop.AllowSleep(); } -bool AtBreakpoint() +bool FifoManager::AtBreakpoint() const { auto& system = Core::System::GetInstance(); auto& command_processor = system.GetCommandProcessor(); @@ -458,44 +419,44 @@ bool AtBreakpoint() fifo.CPBreakpoint.load(std::memory_order_relaxed)); } -void RunGpu() +void FifoManager::RunGpu() { auto& system = Core::System::GetInstance(); const bool is_dual_core = system.IsDualCoreMode(); // wake up GPU thread - if (is_dual_core && !s_use_deterministic_gpu_thread) + if (is_dual_core && !m_use_deterministic_gpu_thread) { - s_gpu_mainloop.Wakeup(); + m_gpu_mainloop.Wakeup(); } // if the sync GPU callback is suspended, wake it up. - if (!is_dual_core || s_use_deterministic_gpu_thread || s_config_sync_gpu) + if (!is_dual_core || m_use_deterministic_gpu_thread || m_config_sync_gpu) { - if (s_syncing_suspended) + if (m_syncing_suspended) { - s_syncing_suspended = false; - system.GetCoreTiming().ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, + m_syncing_suspended = false; + system.GetCoreTiming().ScheduleEvent(GPU_TIME_SLOT_SIZE, m_event_sync_gpu, GPU_TIME_SLOT_SIZE); } } } -static int RunGpuOnCpu(int ticks) +int FifoManager::RunGpuOnCpu(int ticks) { auto& system = Core::System::GetInstance(); auto& command_processor = system.GetCommandProcessor(); auto& fifo = command_processor.GetFifo(); bool reset_simd_state = false; - int available_ticks = int(ticks * s_config_sync_gpu_overclock) + s_sync_ticks.load(); + int available_ticks = int(ticks * m_config_sync_gpu_overclock) + m_sync_ticks.load(); while (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint() && available_ticks >= 0) { - if (s_use_deterministic_gpu_thread) + if (m_use_deterministic_gpu_thread) { ReadDataFromFifoOnCPU(fifo.CPReadPointer.load(std::memory_order_relaxed)); - s_gpu_mainloop.Wakeup(); + m_gpu_mainloop.Wakeup(); } else { @@ -507,8 +468,8 @@ static int RunGpuOnCpu(int ticks) } ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed)); u32 cycles = 0; - s_video_buffer_read_ptr = OpcodeDecoder::RunFifo( - DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles); + m_video_buffer_read_ptr = OpcodeDecoder::RunFifo( + DataReader(m_video_buffer_read_ptr, m_video_buffer_write_ptr), &cycles); available_ticks -= cycles; } @@ -534,7 +495,7 @@ static int RunGpuOnCpu(int ticks) } // Discard all available ticks as there is nothing to do any more. - s_sync_ticks.store(std::min(available_ticks, 0)); + m_sync_ticks.store(std::min(available_ticks, 0)); // If the GPU is idle, drop the handler. if (available_ticks >= 0) @@ -544,7 +505,7 @@ static int RunGpuOnCpu(int ticks) return -available_ticks + GPU_TIME_SLOT_SIZE; } -void UpdateWantDeterminism(bool want) +void FifoManager::UpdateWantDeterminism(bool want) { // We are paused (or not running at all yet), so // it should be safe to change this. @@ -564,87 +525,83 @@ void UpdateWantDeterminism(bool want) gpu_thread = gpu_thread && Core::System::GetInstance().IsDualCoreMode(); - if (s_use_deterministic_gpu_thread != gpu_thread) + if (m_use_deterministic_gpu_thread != gpu_thread) { - s_use_deterministic_gpu_thread = gpu_thread; + m_use_deterministic_gpu_thread = gpu_thread; if (gpu_thread) { // These haven't been updated in non-deterministic mode. - s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr; + m_video_buffer_seen_ptr = m_video_buffer_pp_read_ptr = m_video_buffer_read_ptr; CopyPreprocessCPStateFromMain(); VertexLoaderManager::MarkAllDirty(); } } } -bool UseDeterministicGPUThread() -{ - return s_use_deterministic_gpu_thread; -} - /* This function checks the emulated CPU - GPU distance and may wake up the GPU, * or block the CPU if required. It should be called by the CPU thread regularly. * @ticks The gone emulated CPU time. * @return A good time to call WaitForGpuThread() next. */ -static int WaitForGpuThread(int ticks) +int FifoManager::WaitForGpuThread(int ticks) { - int old = s_sync_ticks.fetch_add(ticks); + int old = m_sync_ticks.fetch_add(ticks); int now = old + ticks; // GPU is idle, so stop polling. - if (old >= 0 && s_gpu_mainloop.IsDone()) + if (old >= 0 && m_gpu_mainloop.IsDone()) return -1; // Wakeup GPU - if (old < s_config_sync_gpu_min_distance && now >= s_config_sync_gpu_min_distance) + if (old < m_config_sync_gpu_min_distance && now >= m_config_sync_gpu_min_distance) RunGpu(); // If the GPU is still sleeping, wait for a longer time - if (now < s_config_sync_gpu_min_distance) - return GPU_TIME_SLOT_SIZE + s_config_sync_gpu_min_distance - now; + if (now < m_config_sync_gpu_min_distance) + return GPU_TIME_SLOT_SIZE + m_config_sync_gpu_min_distance - now; // Wait for GPU - if (now >= s_config_sync_gpu_max_distance) - s_sync_wakeup_event.Wait(); + if (now >= m_config_sync_gpu_max_distance) + m_sync_wakeup_event.Wait(); return GPU_TIME_SLOT_SIZE; } -static void SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate) +void FifoManager::SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate) { ticks += cyclesLate; int next = -1; - if (!system.IsDualCoreMode() || s_use_deterministic_gpu_thread) + auto& fifo = system.GetFifo(); + if (!system.IsDualCoreMode() || fifo.m_use_deterministic_gpu_thread) { - next = RunGpuOnCpu((int)ticks); + next = fifo.RunGpuOnCpu((int)ticks); } - else if (s_config_sync_gpu) + else if (fifo.m_config_sync_gpu) { - next = WaitForGpuThread((int)ticks); + next = fifo.WaitForGpuThread((int)ticks); } - s_syncing_suspended = next < 0; - if (!s_syncing_suspended) - system.GetCoreTiming().ScheduleEvent(next, s_event_sync_gpu, next); + fifo.m_syncing_suspended = next < 0; + if (!fifo.m_syncing_suspended) + system.GetCoreTiming().ScheduleEvent(next, fifo.m_event_sync_gpu, next); } -void SyncGPUForRegisterAccess() +void FifoManager::SyncGPUForRegisterAccess() { SyncGPU(SyncGPUReason::Other); - if (!Core::System::GetInstance().IsDualCoreMode() || s_use_deterministic_gpu_thread) + if (!Core::System::GetInstance().IsDualCoreMode() || m_use_deterministic_gpu_thread) RunGpuOnCpu(GPU_TIME_SLOT_SIZE); - else if (s_config_sync_gpu) + else if (m_config_sync_gpu) WaitForGpuThread(GPU_TIME_SLOT_SIZE); } // Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread. -void Prepare() +void FifoManager::Prepare() { - s_event_sync_gpu = + m_event_sync_gpu = Core::System::GetInstance().GetCoreTiming().RegisterEvent("SyncGPUCallback", SyncGPUCallback); - s_syncing_suspended = true; + m_syncing_suspended = true; } } // namespace Fifo diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index 6b640bf7ca..ab028a5c7f 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -3,21 +3,28 @@ #pragma once +#include #include +#include + +#include "Common/BlockingLoop.h" #include "Common/CommonTypes.h" +#include "Common/Event.h" +#include "Common/Flag.h" class PointerWrap; +namespace Core +{ +class System; +} +namespace CoreTiming +{ +struct EventType; +} + namespace Fifo { -void Init(); -void Shutdown(); -void Prepare(); // Must be called from the CPU thread. -void DoState(PointerWrap& f); -void PauseAndLock(bool doLock, bool unpauseOnUnlock); -void UpdateWantDeterminism(bool want); -bool UseDeterministicGPUThread(); - // Used for diagnostics. enum class SyncGPUReason { @@ -29,23 +36,95 @@ enum class SyncGPUReason Swap, AuxSpace, }; -// In deterministic GPU thread mode this waits for the GPU to be done with pending work. -void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true); -// In single core mode, this runs the GPU for a single slice. -// In dual core mode, this synchronizes with the GPU thread. -void SyncGPUForRegisterAccess(); +class FifoManager final +{ +public: + FifoManager(); + FifoManager(const FifoManager& other) = delete; + FifoManager(FifoManager&& other) = delete; + FifoManager& operator=(const FifoManager& other) = delete; + FifoManager& operator=(FifoManager&& other) = delete; + ~FifoManager(); -void PushFifoAuxBuffer(const void* ptr, size_t size); -void* PopFifoAuxBuffer(size_t size); + void Init(); + void Shutdown(); + void Prepare(); // Must be called from the CPU thread. + void DoState(PointerWrap& f); + void PauseAndLock(bool doLock, bool unpauseOnUnlock); + void UpdateWantDeterminism(bool want); + bool UseDeterministicGPUThread() const { return m_use_deterministic_gpu_thread; } -void FlushGpu(); -void RunGpu(); -void GpuMaySleep(); -void RunGpuLoop(); -void ExitGpuLoop(); -void EmulatorState(bool running); -bool AtBreakpoint(); -void ResetVideoBuffer(); + // In deterministic GPU thread mode this waits for the GPU to be done with pending work. + void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true); + // In single core mode, this runs the GPU for a single slice. + // In dual core mode, this synchronizes with the GPU thread. + void SyncGPUForRegisterAccess(); + + void PushFifoAuxBuffer(const void* ptr, size_t size); + void* PopFifoAuxBuffer(size_t size); + + void FlushGpu(); + void RunGpu(); + void GpuMaySleep(); + void RunGpuLoop(); + void ExitGpuLoop(); + void EmulatorState(bool running); + bool AtBreakpoint() const; + void ResetVideoBuffer(); + +private: + void RefreshConfig(); + void ReadDataFromFifo(u32 readPtr); + void ReadDataFromFifoOnCPU(u32 readPtr); + int RunGpuOnCpu(int ticks); + int WaitForGpuThread(int ticks); + static void SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate); + + static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024; + + Common::BlockingLoop m_gpu_mainloop; + + Common::Flag m_emu_running_state; + + // Most of this array is unlikely to be faulted in... + u8 m_fifo_aux_data[FIFO_SIZE]{}; + u8* m_fifo_aux_write_ptr = nullptr; + u8* m_fifo_aux_read_ptr = nullptr; + + // This could be in SConfig, but it depends on multiple settings + // and can change at runtime. + bool m_use_deterministic_gpu_thread = false; + + CoreTiming::EventType* m_event_sync_gpu = nullptr; + + // STATE_TO_SAVE + u8* m_video_buffer = nullptr; + u8* m_video_buffer_read_ptr = nullptr; + std::atomic m_video_buffer_write_ptr = nullptr; + std::atomic m_video_buffer_seen_ptr = nullptr; + u8* m_video_buffer_pp_read_ptr = nullptr; + // The read_ptr is always owned by the GPU thread. In normal mode, so is the + // write_ptr, despite it being atomic. In deterministic GPU thread mode, + // things get a bit more complicated: + // - The seen_ptr is written by the GPU thread, and points to what it's already + // processed as much of as possible - in the case of a partial command which + // caused it to stop, not the same as the read ptr. It's written by the GPU, + // under the lock, and updating the cond. + // - The write_ptr is written by the CPU thread after it copies data from the + // FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop + // polls, it's just atomic. + // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. + + std::atomic m_sync_ticks = 0; + bool m_syncing_suspended = false; + Common::Event m_sync_wakeup_event; + + std::optional m_config_callback_id = std::nullopt; + bool m_config_sync_gpu = false; + int m_config_sync_gpu_max_distance = 0; + int m_config_sync_gpu_min_distance = 0; + float m_config_sync_gpu_overclock = 0.0f; +}; } // namespace Fifo diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index d14bb46e6c..3cf16eb8ad 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -151,13 +151,14 @@ public: { m_in_display_list = true; + auto& system = Core::System::GetInstance(); + if constexpr (is_preprocess) { - auto& system = Core::System::GetInstance(); auto& memory = system.GetMemory(); const u8* const start_address = memory.GetPointer(address); - Fifo::PushFifoAuxBuffer(start_address, size); + system.GetFifo().PushFifoAuxBuffer(start_address, size); if (start_address != nullptr) { @@ -168,13 +169,13 @@ public: { const u8* start_address; - if (Fifo::UseDeterministicGPUThread()) + auto& fifo = system.GetFifo(); + if (fifo.UseDeterministicGPUThread()) { - start_address = static_cast(Fifo::PopFifoAuxBuffer(size)); + start_address = static_cast(fifo.PopFifoAuxBuffer(size)); } else { - auto& system = Core::System::GetInstance(); auto& memory = system.GetMemory(); start_address = memory.GetPointer(address); } diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index 3623104c76..2684585ca7 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -333,7 +333,8 @@ static void RaiseEvent(int cycles_into_future) CoreTiming::FromThread from = CoreTiming::FromThread::NON_CPU; s64 cycles = 0; // we don't care about timings for dual core mode. - if (!Core::System::GetInstance().IsDualCoreMode() || Fifo::UseDeterministicGPUThread()) + auto& system = Core::System::GetInstance(); + if (!system.IsDualCoreMode() || system.GetFifo().UseDeterministicGPUThread()) { from = CoreTiming::FromThread::CPU; diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index 8e23c02dab..29915c2803 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -83,7 +83,8 @@ std::string VideoBackendBase::BadShaderFilename(const char* shader_stage, int co void VideoBackendBase::Video_ExitLoop() { - Fifo::ExitGpuLoop(); + auto& system = Core::System::GetInstance(); + system.GetFifo().ExitGpuLoop(); } // Run from the CPU thread (from VideoInterface.cpp) @@ -92,7 +93,8 @@ void VideoBackendBase::Video_OutputXFB(u32 xfb_addr, u32 fb_width, u32 fb_stride { if (m_initialized && g_renderer && !g_ActiveConfig.bImmediateXFB) { - Fifo::SyncGPU(Fifo::SyncGPUReason::Swap); + auto& system = Core::System::GetInstance(); + system.GetFifo().SyncGPU(Fifo::SyncGPUReason::Swap); AsyncRequests::Event e; e.time = ticks; @@ -147,7 +149,8 @@ u32 VideoBackendBase::Video_GetQueryResult(PerfQueryType type) return 0; } - Fifo::SyncGPU(Fifo::SyncGPUReason::PerfQuery); + auto& system = Core::System::GetInstance(); + system.GetFifo().SyncGPU(Fifo::SyncGPUReason::PerfQuery); AsyncRequests::Event e; e.time = 0; @@ -185,7 +188,8 @@ u16 VideoBackendBase::Video_GetBoundingBox(int index) warn_once = false; } - Fifo::SyncGPU(Fifo::SyncGPUReason::BBox); + auto& system = Core::System::GetInstance(); + system.GetFifo().SyncGPU(Fifo::SyncGPUReason::BBox); AsyncRequests::Event e; u16 result; @@ -291,7 +295,8 @@ void VideoBackendBase::PopulateBackendInfoFromUI() void VideoBackendBase::DoState(PointerWrap& p) { - if (!Core::System::GetInstance().IsDualCoreMode()) + auto& system = Core::System::GetInstance(); + if (!system.IsDualCoreMode()) { VideoCommon_DoState(p); return; @@ -304,7 +309,7 @@ void VideoBackendBase::DoState(PointerWrap& p) // Let the GPU thread sleep after loading the state, so we're not spinning if paused after loading // a state. The next GP burst will wake it up again. - Fifo::GpuMaySleep(); + system.GetFifo().GpuMaySleep(); } void VideoBackendBase::InitializeShared() @@ -319,7 +324,7 @@ void VideoBackendBase::InitializeShared() auto& system = Core::System::GetInstance(); auto& command_processor = system.GetCommandProcessor(); command_processor.Init(system); - Fifo::Init(); + system.GetFifo().Init(); PixelEngine::Init(); BPInit(); VertexLoaderManager::Init(); @@ -336,6 +341,7 @@ void VideoBackendBase::ShutdownShared() { m_initialized = false; + auto& system = Core::System::GetInstance(); VertexLoaderManager::Clear(); - Fifo::Shutdown(); + system.GetFifo().Shutdown(); } diff --git a/Source/Core/VideoCommon/VideoState.cpp b/Source/Core/VideoCommon/VideoState.cpp index ce76727a0b..9279e7f72c 100644 --- a/Source/Core/VideoCommon/VideoState.cpp +++ b/Source/Core/VideoCommon/VideoState.cpp @@ -60,10 +60,10 @@ void VideoCommon_DoState(PointerWrap& p) p.DoMarker("TMEM"); // FIFO - Fifo::DoState(p); + auto& system = Core::System::GetInstance(); + system.GetFifo().DoState(p); p.DoMarker("Fifo"); - auto& system = Core::System::GetInstance(); auto& command_processor = system.GetCommandProcessor(); command_processor.DoState(p); p.DoMarker("CommandProcessor"); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index 716b29aef2..96ab0cd444 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -257,13 +257,14 @@ void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size) u32* currData = (u32*)(&xfmem) + address; u32* newData; - if (Fifo::UseDeterministicGPUThread()) + auto& system = Core::System::GetInstance(); + auto& fifo = system.GetFifo(); + if (fifo.UseDeterministicGPUThread()) { - newData = (u32*)Fifo::PopFifoAuxBuffer(size * sizeof(u32)); + newData = (u32*)fifo.PopFifoAuxBuffer(size * sizeof(u32)); } else { - auto& system = Core::System::GetInstance(); auto& memory = system.GetMemory(); newData = (u32*)memory.GetPointer(g_main_cp_state.array_bases[array] + g_main_cp_state.array_strides[array] * index); @@ -293,7 +294,7 @@ void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size) g_preprocess_cp_state.array_strides[array] * index); const size_t buf_size = size * sizeof(u32); - Fifo::PushFifoAuxBuffer(new_data, buf_size); + system.GetFifo().PushFifoAuxBuffer(new_data, buf_size); } std::pair GetXFRegInfo(u32 address, u32 value)