diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 5e75c27b05..d9872dcc3b 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -480,11 +480,8 @@ void Idle() //When the FIFO is processing data we must not advance because in this way //the VI will be desynchronized. So, We are waiting until the FIFO finish and //while we process only the events required by the FIFO. - while (g_video_backend->Video_IsPossibleWaitingSetDrawDone()) - { - ProcessFifoWaitEvents(); - Common::YieldCPU(); - } + ProcessFifoWaitEvents(); + g_video_backend->Video_Sync(); } idledCycles += DowncountToCycles(PowerPC::ppcState.downcount); diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index 72d1dc8715..e69c3c1e78 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -199,6 +199,9 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate) static void ThrottleCallback(u64 last_time, int cyclesLate) { + // Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz. + CommandProcessor::s_gpuMaySleep.Set(); + u32 time = Common::Timer::GetTimeMs(); int diff = (u32)last_time - time; diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 497d26be00..784fe6d07b 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -360,11 +360,6 @@ void VideoSoftware::Video_GatherPipeBursted() SWCommandProcessor::GatherPipeBursted(); } -bool VideoSoftware::Video_IsPossibleWaitingSetDrawDone() -{ - return false; -} - void VideoSoftware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) { SWCommandProcessor::RegisterMMIO(mmio, base); diff --git a/Source/Core/VideoBackends/Software/VideoBackend.h b/Source/Core/VideoBackends/Software/VideoBackend.h index 0edb3bd4df..8697e27c98 100644 --- a/Source/Core/VideoBackends/Software/VideoBackend.h +++ b/Source/Core/VideoBackends/Software/VideoBackend.h @@ -45,7 +45,7 @@ class VideoSoftware : public VideoBackend void Video_SetRendering(bool bEnabled) override; void Video_GatherPipeBursted() override; - bool Video_IsPossibleWaitingSetDrawDone() override; + void Video_Sync() override {} void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override; diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index a6cc5a4b9c..d2945a3dbc 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -1,4 +1,5 @@ #include "VideoCommon/AsyncRequests.h" +#include "VideoCommon/Fifo.h" #include "VideoCommon/RenderBase.h" AsyncRequests AsyncRequests::s_singleton; @@ -49,6 +50,7 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking) m_queue.push(event); + RunGpu(); if (blocking) { m_cond.wait(lock, [this]{return m_queue.empty();}); diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index b53c50fc2e..2c6660e1f8 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -40,12 +40,13 @@ static u16 m_bboxright; static u16 m_bboxbottom; static u16 m_tokenReg; -volatile bool isPossibleWaitingSetDrawDone = false; volatile bool interruptSet= false; volatile bool interruptWaiting= false; volatile bool interruptTokenWaiting = false; volatile bool interruptFinishWaiting = false; +Common::Flag s_gpuMaySleep; + volatile u32 VITicks = CommandProcessor::m_cpClockOrigin; static bool IsOnThread() @@ -70,7 +71,6 @@ void DoState(PointerWrap &p) p.Do(m_tokenReg); p.Do(fifo); - p.Do(isPossibleWaitingSetDrawDone); p.Do(interruptSet); p.Do(interruptWaiting); p.Do(interruptTokenWaiting); @@ -123,8 +123,6 @@ void Init() interruptFinishWaiting = false; interruptTokenWaiting = false; - isPossibleWaitingSetDrawDone = false; - et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper); } @@ -319,13 +317,10 @@ void GatherPipeBursted() (ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0) { - ProcessFifoAllDistance(); + FlushGpu(); } } - else - { - RunGpu(); - } + RunGpu(); return; } @@ -375,6 +370,7 @@ void UpdateInterrupts(u64 userdata) } CoreTiming::ForceExceptionCheck(0); interruptWaiting = false; + RunGpu(); } void UpdateInterruptsFromVideoBackend(u64 userdata) @@ -470,15 +466,6 @@ void SetCPStatusFromCPU() } } -void ProcessFifoAllDistance() -{ - if (IsOnThread()) - { - while (!interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) - Common::YieldCPU(); - } -} - void ProcessFifoEvents() { if (IsOnThread() && (interruptWaiting || interruptFinishWaiting || interruptTokenWaiting)) @@ -520,7 +507,7 @@ void SetCpControlRegister() if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable) { fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable; - while (fifo.isGpuReadingData) Common::YieldCPU(); + FlushGpu(); } else { @@ -551,5 +538,7 @@ void Update() if (fifo.isGpuReadingData) Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000); + + RunGpu(); } } // end of namespace CommandProcessor diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index 0dad1578af..6d177844a6 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -5,6 +5,7 @@ #pragma once #include "Common/CommonTypes.h" +#include "Common/Flag.h" #include "VideoCommon/VideoBackendBase.h" class PointerWrap; @@ -17,11 +18,11 @@ namespace CommandProcessor extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread. -extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread. extern volatile bool interruptSet; extern volatile bool interruptWaiting; extern volatile bool interruptTokenWaiting; extern volatile bool interruptFinishWaiting; +extern Common::Flag s_gpuMaySleep; // internal hardware addresses enum @@ -145,7 +146,6 @@ void UpdateInterruptsFromVideoBackend(u64 userdata); void SetCpClearRegister(); void SetCpControlRegister(); void SetCpStatusRegister(); -void ProcessFifoAllDistance(); void ProcessFifoEvents(); void Update(); diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 8c7dbce49b..c5e49fb583 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -5,6 +5,7 @@ #include "Common/Atomic.h" #include "Common/ChunkFile.h" #include "Common/CPUDetect.h" +#include "Common/Event.h" #include "Common/FPURoundMode.h" #include "Common/MemoryUtil.h" #include "Common/Thread.h" @@ -29,7 +30,6 @@ bool g_bSkipCurrentFrame = false; static volatile bool GpuRunningState = false; static volatile bool EmuRunningState = false; -static std::mutex m_csHWVidOccupied; // Most of this array is unlikely to be faulted in... static u8 s_fifo_aux_data[FIFO_SIZE]; @@ -58,6 +58,12 @@ static u8* s_video_buffer_pp_read_ptr; // polls, it's just atomic. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. +static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again +static Common::Event s_gpu_new_work_event; + +static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do +static Common::Event s_gpu_done_event; + void Fifo_DoState(PointerWrap &p) { p.DoArray(s_video_buffer, FIFO_SIZE); @@ -79,16 +85,12 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock) { SyncGPU(SYNC_GPU_OTHER); EmulatorState(false); - if (!Core::IsGPUThread()) - m_csHWVidOccupied.lock(); - _dbg_assert_(COMMON, !CommandProcessor::fifo.isGpuReadingData); + FlushGpu(); } else { if (unpauseOnUnlock) EmulatorState(true); - if (!Core::IsGPUThread()) - m_csHWVidOccupied.unlock(); } } @@ -127,17 +129,18 @@ void ExitGpuLoop() { // This should break the wait loop in CPU thread CommandProcessor::fifo.bFF_GPReadEnable = false; - SCPFifoStruct &fifo = CommandProcessor::fifo; - while (fifo.isGpuReadingData) - Common::YieldCPU(); + FlushGpu(); + // Terminate GPU thread loop GpuRunningState = false; EmuRunningState = true; + s_gpu_new_work_event.Set(); } void EmulatorState(bool running) { EmuRunningState = running; + s_gpu_new_work_event.Set(); } void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) @@ -266,15 +269,10 @@ void ResetVideoBuffer() // Purpose: Keep the Core HW updated about the CPU-GPU distance void RunGpuLoop() { - std::lock_guard lk(m_csHWVidOccupied); GpuRunningState = true; SCPFifoStruct &fifo = CommandProcessor::fifo; u32 cyclesExecuted = 0; - // If the host CPU has only two cores, idle loop instead of busy loop - // This allows a system that we are maxing out in dual core mode to do other things - bool yield_cpu = cpu_info.num_cores <= 2; - AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); @@ -282,9 +280,10 @@ void RunGpuLoop() { g_video_backend->PeekMessages(); - AsyncRequests::GetInstance()->PullEvents(); - if (g_use_deterministic_gpu_thread) + if (g_use_deterministic_gpu_thread && EmuRunningState) { + AsyncRequests::GetInstance()->PullEvents(); + // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. u8* seen_ptr = s_video_buffer_seen_ptr; u8* write_ptr = s_video_buffer_write_ptr; @@ -300,17 +299,23 @@ void RunGpuLoop() } } } - else + else if (EmuRunningState) { + AsyncRequests::GetInstance()->PullEvents(); + CommandProcessor::SetCPStatusFromGPU(); - Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin); + if (!fifo.isGpuReadingData) + { + Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin); + } + + bool run_loop = true; // check if we are able to run this buffer - while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) + while (run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { fifo.isGpuReadingData = true; - CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false; if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin) { @@ -338,6 +343,10 @@ void RunGpuLoop() if ((write_ptr - s_video_buffer_read_ptr) == 0) Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); } + else + { + run_loop = false; + } CommandProcessor::SetCPStatusFromGPU(); @@ -345,30 +354,28 @@ void RunGpuLoop() // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); - CommandProcessor::isPossibleWaitingSetDrawDone = false; } - fifo.isGpuReadingData = false; + // don't release the GPU running state on sync GPU waits + fifo.isGpuReadingData = !run_loop; } - if (EmuRunningState) + s_gpu_is_pending.Clear(); + s_gpu_done_event.Set(); + + if (s_gpu_is_running.IsSet()) { - // NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler. - // See https://docs.google.com/spreadsheet/ccc?key=0Ah4nh0yGtjrgdFpDeF9pS3V6RUotRVE3S3J4TGM1NlE#gid=0 - // for benchmark details. - if (yield_cpu) - Common::YieldCPU(); + if (CommandProcessor::s_gpuMaySleep.IsSet()) + { + // Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop + s_gpu_is_pending.Set(); + s_gpu_is_running.Clear(); + CommandProcessor::s_gpuMaySleep.Clear(); + } } else { - // While the emu is paused, we still handle async requests then sleep. - while (!EmuRunningState) - { - g_video_backend->PeekMessages(); - m_csHWVidOccupied.unlock(); - Common::SleepCurrentThread(1); - m_csHWVidOccupied.lock(); - } + s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100)); } } // wake up SyncGPU if we were interrupted @@ -377,6 +384,17 @@ void RunGpuLoop() AsyncRequests::GetInstance()->SetPassthrough(true); } +void FlushGpu() +{ + if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) + return; + + while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet()) + { + CommandProcessor::s_gpuMaySleep.Set(); + s_gpu_done_event.Wait(); + } +} bool AtBreakpoint() { @@ -386,41 +404,59 @@ bool AtBreakpoint() void RunGpu() { - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && - !g_use_deterministic_gpu_thread) - return; - SCPFifoStruct &fifo = CommandProcessor::fifo; - while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) + + // execute GPU + if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) { - if (g_use_deterministic_gpu_thread) + bool reset_simd_state = false; + while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) { - ReadDataFromFifoOnCPU(fifo.CPReadPointer); + if (g_use_deterministic_gpu_thread) + { + ReadDataFromFifoOnCPU(fifo.CPReadPointer); + } + else + { + if (!reset_simd_state) + { + FPURoundMode::SaveSIMDState(); + FPURoundMode::LoadDefaultSIMDState(); + reset_simd_state = true; + } + ReadDataFromFifo(fifo.CPReadPointer); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); + } + + //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); + + if (fifo.CPReadPointer == fifo.CPEnd) + fifo.CPReadPointer = fifo.CPBase; + else + fifo.CPReadPointer += 32; + + fifo.CPReadWriteDistance -= 32; } - else + CommandProcessor::SetCPStatusFromGPU(); + + if (reset_simd_state) { - FPURoundMode::SaveSIMDState(); - FPURoundMode::LoadDefaultSIMDState(); - ReadDataFromFifo(fifo.CPReadPointer); - s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); FPURoundMode::LoadSIMDState(); } - - //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); - - if (fifo.CPReadPointer == fifo.CPEnd) - fifo.CPReadPointer = fifo.CPBase; - else - fifo.CPReadPointer += 32; - - fifo.CPReadWriteDistance -= 32; } - CommandProcessor::SetCPStatusFromGPU(); + + // wake up GPU thread + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet()) + { + s_gpu_is_pending.Set(); + s_gpu_is_running.Set(); + s_gpu_new_work_event.Set(); + } } void Fifo_UpdateWantDeterminism(bool want) { - // We are paused (or not running at all yet) and have m_csHWVidOccupied, so + // We are paused (or not running at all yet), so // it should be safe to change this. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter; bool gpu_thread = false; diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index 8cec0d824d..f8b4b5f625 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -41,6 +41,7 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true); void PushFifoAuxBuffer(void* ptr, size_t size); void* PopFifoAuxBuffer(size_t size); +void FlushGpu(); void RunGpu(); void RunGpuLoop(); void ExitGpuLoop(); diff --git a/Source/Core/VideoCommon/MainBase.cpp b/Source/Core/VideoCommon/MainBase.cpp index a8af71b31e..102e17db34 100644 --- a/Source/Core/VideoCommon/MainBase.cpp +++ b/Source/Core/VideoCommon/MainBase.cpp @@ -233,9 +233,9 @@ void VideoBackendHardware::Video_GatherPipeBursted() CommandProcessor::GatherPipeBursted(); } -bool VideoBackendHardware::Video_IsPossibleWaitingSetDrawDone() +void VideoBackendHardware::Video_Sync() { - return CommandProcessor::isPossibleWaitingSetDrawDone; + FlushGpu(); } void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index afe08c7d0a..da086924b1 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -287,7 +287,6 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate) Common::AtomicStore(*(volatile u32*)&g_bSignalFinishInterrupt, 1); UpdateInterrupts(); CommandProcessor::interruptFinishWaiting = false; - CommandProcessor::isPossibleWaitingSetDrawDone = false; } // SetToken diff --git a/Source/Core/VideoCommon/VideoBackendBase.h b/Source/Core/VideoCommon/VideoBackendBase.h index 4796a29b07..9aa6208e52 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.h +++ b/Source/Core/VideoCommon/VideoBackendBase.h @@ -99,7 +99,7 @@ public: virtual void Video_GatherPipeBursted() = 0; - virtual bool Video_IsPossibleWaitingSetDrawDone() = 0; + virtual void Video_Sync() = 0; // Registers MMIO handlers for the CommandProcessor registers. virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0; @@ -148,7 +148,7 @@ class VideoBackendHardware : public VideoBackend void Video_GatherPipeBursted() override; - bool Video_IsPossibleWaitingSetDrawDone() override; + void Video_Sync() override; void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;