diff --git a/Source/Core/Common/BlockingLoop.h b/Source/Core/Common/BlockingLoop.h index 8b5b397013..7d4fec495b 100644 --- a/Source/Core/Common/BlockingLoop.h +++ b/Source/Core/Common/BlockingLoop.h @@ -54,7 +54,7 @@ public: void Wait() { // already done - if (m_stopped.IsSet() || m_running_state.load() <= STATE_DONE) + if (IsDone()) return; // notifying this event will only wake up one thread, so use a mutex here to @@ -63,7 +63,7 @@ public: std::lock_guard lk(m_wait_lock); // Wait for the worker thread to finish. - while (!m_stopped.IsSet() && m_running_state.load() > STATE_DONE) + while (!IsDone()) { m_done_event.Wait(); } @@ -183,6 +183,11 @@ public: return !m_stopped.IsSet() && !m_shutdown.IsSet(); } + bool IsDone() const + { + return m_stopped.IsSet() || m_running_state.load() <= STATE_DONE; + } + // This function should be triggered regularly over time so // that we will fall back from the busy loop to sleeping. void AllowSleep() diff --git a/Source/Core/Core/ConfigManager.cpp b/Source/Core/Core/ConfigManager.cpp index 9439e3a228..7d7323bd54 100644 --- a/Source/Core/Core/ConfigManager.cpp +++ b/Source/Core/Core/ConfigManager.cpp @@ -182,6 +182,10 @@ void SConfig::SaveCoreSettings(IniFile& ini) core->Set("DSPHLE", m_LocalCoreStartupParameter.bDSPHLE); core->Set("SkipIdle", m_LocalCoreStartupParameter.bSkipIdle); core->Set("SyncOnSkipIdle", m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack); + core->Set("SyncGPU", m_LocalCoreStartupParameter.bSyncGPU); + core->Set("SyncGpuMaxDistance", m_LocalCoreStartupParameter.iSyncGpuMaxDistance); + core->Set("SyncGpuMinDistance", m_LocalCoreStartupParameter.iSyncGpuMinDistance); + core->Set("SyncGpuOverclock", m_LocalCoreStartupParameter.fSyncGpuOverclock); core->Set("DefaultISO", m_LocalCoreStartupParameter.m_strDefaultISO); core->Set("DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot); core->Set("Apploader", m_LocalCoreStartupParameter.m_strApploader); @@ -458,6 +462,9 @@ void SConfig::LoadCoreSettings(IniFile& ini) core->Get("MMU", &m_LocalCoreStartupParameter.bMMU, false); core->Get("BBDumpPort", &m_LocalCoreStartupParameter.iBBDumpPort, -1); core->Get("SyncGPU", &m_LocalCoreStartupParameter.bSyncGPU, false); + core->Get("SyncGpuMaxDistance", &m_LocalCoreStartupParameter.iSyncGpuMaxDistance, 200000); + core->Get("SyncGpuMinDistance", &m_LocalCoreStartupParameter.iSyncGpuMinDistance, -200000); + core->Get("SyncGpuOverclock", &m_LocalCoreStartupParameter.fSyncGpuOverclock, 1.0); core->Get("FastDiscSpeed", &m_LocalCoreStartupParameter.bFastDiscSpeed, false); core->Get("DCBZ", &m_LocalCoreStartupParameter.bDCBZOFF, false); core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default diff --git a/Source/Core/Core/CoreParameter.h b/Source/Core/Core/CoreParameter.h index 43d758bbad..4431deff73 100644 --- a/Source/Core/Core/CoreParameter.h +++ b/Source/Core/Core/CoreParameter.h @@ -184,9 +184,13 @@ struct SCoreStartupParameter bool bMMU; bool bDCBZOFF; int iBBDumpPort; - bool bSyncGPU; bool bFastDiscSpeed; + bool bSyncGPU; + int iSyncGpuMaxDistance; + int iSyncGpuMinDistance; + float fSyncGpuOverclock; + int SelectedLanguage; bool bWii; diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 8ee739caad..103785dbcf 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -482,13 +482,13 @@ void Idle() { //DEBUG_LOG(POWERPC, "Idle"); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack) { //When the FIFO is processing data we must not advance because in this way //the VI will be desynchronized. So, We are waiting until the FIFO finish and //while we process only the events required by the FIFO. ProcessFifoWaitEvents(); - g_video_backend->Video_Sync(); + g_video_backend->Video_Sync(0); } idledCycles += DowncountToCycles(PowerPC::ppcState.downcount); diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index a44614d9e0..317a96a39c 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -61,11 +61,9 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule: #include "Core/IPC_HLE/WII_IPC_HLE.h" #include "Core/PowerPC/PowerPC.h" -#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/VideoBackendBase.h" - namespace SystemTimers { @@ -81,15 +79,14 @@ static int et_IPC_HLE; static int et_PatchEngine; // PatchEngine updates every 1/60th of a second by default static int et_Throttle; +static u64 s_last_sync_gpu_tick; + // These are badly educated guesses // Feel free to experiment. Set these in Init below. static int // This is a fixed value, don't change it AUDIO_DMA_PERIOD, - // Regulates the speed of the Command Processor - CP_PERIOD, - // This is completely arbitrary. If we find that we need lower latency, we can just // increase this number. IPC_HLE_PERIOD; @@ -140,8 +137,12 @@ static void SICallback(u64 userdata, int cyclesLate) static void CPCallback(u64 userdata, int cyclesLate) { - CommandProcessor::Update(); - CoreTiming::ScheduleEvent(CP_PERIOD - cyclesLate, et_CP); + u64 now = CoreTiming::GetTicks(); + int next = g_video_backend->Video_Sync((int)(now - s_last_sync_gpu_tick)); + s_last_sync_gpu_tick = now; + + if (next > 0) + CoreTiming::ScheduleEvent(next, et_CP); } static void DecrementerCallback(u64 userdata, int cyclesLate) @@ -239,9 +240,6 @@ void Init() // System internal sample rate is fixed at 32KHz * 4 (16bit Stereo) / 32 bytes DMA AUDIO_DMA_PERIOD = CPU_CORE_CLOCK / (AudioInterface::GetAIDSampleRate() * 4 / 32); - // Emulated gekko <-> flipper bus speed ratio (CPU clock / flipper clock) - CP_PERIOD = GetTicksPerSecond() / 10000; - Common::Timer::IncreaseResolution(); // store and convert localtime at boot to timebase ticks CoreTiming::SetFakeTBStartValue((u64)(CPU_CORE_CLOCK / TIMER_RATIO) * (u64)CEXIIPL::GetGCTime()); @@ -253,7 +251,7 @@ void Init() et_Dec = CoreTiming::RegisterEvent("DecCallback", DecrementerCallback); et_VI = CoreTiming::RegisterEvent("VICallback", VICallback); et_SI = CoreTiming::RegisterEvent("SICallback", SICallback); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) et_CP = CoreTiming::RegisterEvent("CPCallback", CPCallback); et_DSP = CoreTiming::RegisterEvent("DSPCallback", DSPCallback); et_AudioDMA = CoreTiming::RegisterEvent("AudioDMACallback", AudioDMACallback); @@ -266,8 +264,9 @@ void Init() CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerFrame(), et_SI); CoreTiming::ScheduleEvent(AUDIO_DMA_PERIOD, et_AudioDMA); CoreTiming::ScheduleEvent(0, et_Throttle, Common::Timer::GetTimeMs()); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) - CoreTiming::ScheduleEvent(CP_PERIOD, et_CP); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) + CoreTiming::ScheduleEvent(0, et_CP); + s_last_sync_gpu_tick = CoreTiming::GetTicks(); CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerFrame(), et_PatchEngine); diff --git a/Source/Core/VideoBackends/Software/VideoBackend.h b/Source/Core/VideoBackends/Software/VideoBackend.h index 2fe2f2323b..c7c502995b 100644 --- a/Source/Core/VideoBackends/Software/VideoBackend.h +++ b/Source/Core/VideoBackends/Software/VideoBackend.h @@ -49,7 +49,7 @@ class VideoSoftware : public VideoBackend void Video_SetRendering(bool bEnabled) override; void Video_GatherPipeBursted() override; - void Video_Sync() override {} + int Video_Sync(int ticks) override { return 0; } void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override; diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 88b16afe7e..e16484de4d 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -16,7 +16,6 @@ #include "Core/HW/Memmap.h" #include "Core/HW/MMIO.h" #include "Core/HW/ProcessorInterface.h" -#include "Core/HW/SystemTimers.h" #include "VideoCommon/CommandProcessor.h" #include "VideoCommon/Fifo.h" #include "VideoCommon/PixelEngine.h" @@ -47,8 +46,6 @@ static std::atomic s_interrupt_waiting; static std::atomic s_interrupt_token_waiting; static std::atomic s_interrupt_finish_waiting; -static std::atomic s_vi_ticks(CommandProcessor::m_cpClockOrigin); - static bool IsOnThread() { return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread; @@ -546,30 +543,4 @@ void SetCpClearRegister() { } -void Update() -{ - while (s_vi_ticks.load() > m_cpClockOrigin && fifo.isGpuReadingData && IsOnThread()) - Common::YieldCPU(); - - if (fifo.isGpuReadingData) - s_vi_ticks.fetch_add(SystemTimers::GetTicksPerSecond() / 10000); - - RunGpu(); -} - -u32 GetVITicks() -{ - return s_vi_ticks.load(); -} - -void SetVITicks(u32 ticks) -{ - s_vi_ticks.store(ticks); -} - -void DecrementVITicks(u32 ticks) -{ - s_vi_ticks.fetch_sub(ticks); -} - } // end of namespace CommandProcessor diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index ee130b82b6..d5061efd02 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -121,9 +121,6 @@ union UCPClearReg UCPClearReg(u16 _hex) {Hex = _hex; } }; -// Can be any number, low enough to not be below the number of clocks executed by the GPU per CP_PERIOD -const static u32 m_cpClockOrigin = 200000; - // Init void Init(); void Shutdown(); @@ -146,10 +143,4 @@ void SetCpControlRegister(); void SetCpStatusRegister(); void ProcessFifoEvents(); -void Update(); - -u32 GetVITicks(); -void SetVITicks(u32 ticks); -void DecrementVITicks(u32 ticks); - } // namespace CommandProcessor diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 048916f28c..cf1e51219f 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -61,6 +61,9 @@ static u8* s_video_buffer_pp_read_ptr; // polls, it's just atomic. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. +static std::atomic s_sync_ticks; +static Common::Event s_sync_wakeup_event; + void Fifo_DoState(PointerWrap &p) { p.DoArray(s_video_buffer, FIFO_SIZE); @@ -99,7 +102,7 @@ void Fifo_Init() ResetVideoBuffer(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) s_gpu_mainloop.Prepare(); - CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin); + s_sync_ticks.store(0); } void Fifo_Shutdown() @@ -282,6 +285,8 @@ void RunGpuLoop() s_gpu_mainloop.Run( [] { + const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter; + g_video_backend->PeekMessages(); // Do nothing while paused @@ -310,63 +315,59 @@ void RunGpuLoop() CommandProcessor::SetCPStatusFromGPU(); - if (!fifo.isGpuReadingData) - { - CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin); - } - - bool run_loop = true; - // check if we are able to run this buffer - while (run_loop && !CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) + while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { - fifo.isGpuReadingData = true; + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance) + break; - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin) - { - u32 cyclesExecuted = 0; - u32 readPtr = fifo.CPReadPointer; - ReadDataFromFifo(readPtr); + u32 cyclesExecuted = 0; + u32 readPtr = fifo.CPReadPointer; + ReadDataFromFifo(readPtr); - if (readPtr == fifo.CPEnd) - readPtr = fifo.CPBase; - else - readPtr += 32; - - _assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 , - "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32); - - - u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); - - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && CommandProcessor::GetVITicks() >= cyclesExecuted) - CommandProcessor::DecrementVITicks(cyclesExecuted); - - Common::AtomicStore(fifo.CPReadPointer, readPtr); - Common::AtomicAdd(fifo.CPReadWriteDistance, -32); - if ((write_ptr - s_video_buffer_read_ptr) == 0) - Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); - } + if (readPtr == fifo.CPEnd) + readPtr = fifo.CPBase; else - { - run_loop = false; - } + readPtr += 32; + + _assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 , + "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32); + + u8* write_ptr = s_video_buffer_write_ptr; + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); + + Common::AtomicStore(fifo.CPReadPointer, readPtr); + Common::AtomicAdd(fifo.CPReadWriteDistance, -32); + if ((write_ptr - s_video_buffer_read_ptr) == 0) + Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); CommandProcessor::SetCPStatusFromGPU(); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) + { + cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock); + int old = s_sync_ticks.fetch_sub(cyclesExecuted); + if (old > 0 && old - (int)cyclesExecuted <= 0) + s_sync_wakeup_event.Set(); + } + // This call is pretty important in DualCore mode and must be called in the FIFO Loop. // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); } + + // fast skip remaining GPU time if fifo is empty + if (s_sync_ticks.load() > 0) + { + int old = s_sync_ticks.exchange(0); + if (old > 0) + s_sync_wakeup_event.Set(); + } + // The fifo is empty and it's unlikely we will get any more work in the near future. // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer. VertexManager::Flush(); - - // don't release the GPU running state on sync GPU waits - fifo.isGpuReadingData = !run_loop; } }, 100); @@ -376,7 +377,9 @@ void RunGpuLoop() void FlushGpu() { - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) + const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter; + + if (!param.bCPUThread || g_use_deterministic_gpu_thread) return; s_gpu_mainloop.Wait(); @@ -396,9 +399,10 @@ bool AtBreakpoint() void RunGpu() { SCPFifoStruct &fifo = CommandProcessor::fifo; + const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter; // execute GPU - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) + if (!param.bCPUThread || g_use_deterministic_gpu_thread) { bool reset_simd_state = false; while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) @@ -438,7 +442,7 @@ void RunGpu() } // wake up GPU thread - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) + if (param.bCPUThread) { s_gpu_mainloop.Wakeup(); } @@ -471,7 +475,7 @@ void Fifo_UpdateWantDeterminism(bool want) break; } - gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread; + gpu_thread = gpu_thread && param.bCPUThread; if (g_use_deterministic_gpu_thread != gpu_thread) { @@ -485,3 +489,40 @@ void Fifo_UpdateWantDeterminism(bool want) } } } + +int Fifo_Update(int ticks) +{ + const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter; + + if (ticks == 0) + { + FlushGpu(); + return param.iSyncGpuMaxDistance; + } + + // GPU is sleeping, so no need for synchronization + if (s_gpu_mainloop.IsDone() || g_use_deterministic_gpu_thread) + { + if (s_sync_ticks.load() < 0) + { + int old = s_sync_ticks.fetch_add(ticks); + if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance) + RunGpu(); + } + return param.iSyncGpuMaxDistance; + } + + int old = s_sync_ticks.fetch_add(ticks); + if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance) + RunGpu(); + + if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance) + { + while (s_sync_ticks.load() > 0) + { + s_sync_wakeup_event.Wait(); + } + } + + return param.iSyncGpuMaxDistance - s_sync_ticks.load(); +} diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index 8a8a954fe0..57980b24d1 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -50,3 +50,4 @@ void EmulatorState(bool running); bool AtBreakpoint(); void ResetVideoBuffer(); void Fifo_SetRendering(bool bEnabled); +int Fifo_Update(int ticks); diff --git a/Source/Core/VideoCommon/MainBase.cpp b/Source/Core/VideoCommon/MainBase.cpp index 96aa34de90..c92e7d5dab 100644 --- a/Source/Core/VideoCommon/MainBase.cpp +++ b/Source/Core/VideoCommon/MainBase.cpp @@ -245,9 +245,9 @@ void VideoBackendHardware::Video_GatherPipeBursted() CommandProcessor::GatherPipeBursted(); } -void VideoBackendHardware::Video_Sync() +int VideoBackendHardware::Video_Sync(int ticks) { - FlushGpu(); + return Fifo_Update(ticks); } void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) diff --git a/Source/Core/VideoCommon/VideoBackendBase.h b/Source/Core/VideoCommon/VideoBackendBase.h index 74a6e154c4..4671afdc6b 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.h +++ b/Source/Core/VideoCommon/VideoBackendBase.h @@ -56,9 +56,6 @@ struct SCPFifoStruct volatile u32 bFF_LoWatermark; volatile u32 bFF_HiWatermark; - - // for GP watchdog hack - volatile u32 isGpuReadingData; }; class VideoBackend @@ -99,7 +96,7 @@ public: virtual void Video_GatherPipeBursted() = 0; - virtual void Video_Sync() = 0; + virtual int Video_Sync(int ticks) = 0; // Registers MMIO handlers for the CommandProcessor registers. virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0; @@ -148,7 +145,7 @@ class VideoBackendHardware : public VideoBackend void Video_GatherPipeBursted() override; - void Video_Sync() override; + int Video_Sync(int ticks) override; void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;