Merge pull request #2470 from degasus/syncgpu

Common: Blocking Loop (extracted from Fifo.cpp)
This commit is contained in:
comex 2015-06-02 20:19:00 -04:00
commit 0c5aa54606
12 changed files with 399 additions and 71 deletions

View File

@ -0,0 +1,214 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <mutex>
#include <thread>
#include "Common/Event.h"
#include "Common/Flag.h"
namespace Common
{
// This class provides a synchronized loop.
// It's a thread-safe way to trigger a new iteration without busy loops.
// It's optimized for high-usage iterations which usually are already running while it's triggered often.
// Be careful on using Wait() and Wakeup() at the same time. Wait() may block forever while Wakeup() is called regulary.
class BlockingLoop
{
public:
BlockingLoop()
{
m_stopped.Set();
}
~BlockingLoop()
{
Stop();
}
// Triggers to rerun the payload of the Run() function at least once again.
// This function will never block and is designed to finish as fast as possible.
void Wakeup()
{
// Already running, so no need for a wakeup.
// This is the common case, so try to get this as fast as possible.
if (m_running_state.load() >= STATE_NEED_EXECUTION)
return;
// Mark that new data is available. If the old state will rerun the payload
// itself, we don't have to set the event to interrupt the worker.
if (m_running_state.exchange(STATE_NEED_EXECUTION) != STATE_SLEEPING)
return;
// Else as the worker thread may sleep now, we have to set the event.
m_new_work_event.Set();
}
// Wait for a complete payload run after the last Wakeup() call.
// If stopped, this returns immediately.
void Wait()
{
// already done
if (m_stopped.IsSet() || m_running_state.load() <= STATE_DONE)
return;
// notifying this event will only wake up one thread, so use a mutex here to
// allow only one waiting thread. And in this way, we get an event free wakeup
// but for the first thread for free
std::lock_guard<std::mutex> lk(m_wait_lock);
// Wait for the worker thread to finish.
while (!m_stopped.IsSet() && m_running_state.load() > STATE_DONE)
{
m_done_event.Wait();
}
// As we wanted to wait for the other thread, there is likely no work remaining.
// So there is no need for a busy loop any more.
m_may_sleep.Set();
}
// Half start the worker.
// So this object is in a running state and Wait() will block until the worker calls Run().
// This may be called from any thread and is supposed to call at least once before Wait() is used.
void Prepare()
{
// There is a race condition if the other threads call this function while
// the loop thread is initializing. Using this lock will ensure a valid state.
std::lock_guard<std::mutex> lk(m_prepare_lock);
if (!m_stopped.TestAndClear())
return;
m_running_state.store(STATE_LAST_EXECUTION); // so the payload will only be executed once without any Wakeup call
m_shutdown.Clear();
m_may_sleep.Set();
}
// Mainloop of this object.
// The payload callback is called at least as often as it's needed to match the Wakeup() requirements.
// The optional timeout parameters is a timeout how periodicly the payload should be called.
// Use timeout = 0 to run without a timeout at all.
template<class F> void Run(F payload, int64_t timeout = 0)
{
// Asserts that Prepare is called at least once before we enter the loop.
// But a good implementation should call this before already.
Prepare();
while (!m_shutdown.IsSet())
{
payload();
switch (m_running_state.load())
{
case STATE_NEED_EXECUTION:
// We won't get notified while we are in the STATE_NEED_EXECUTION state, so maybe Wakeup was called.
// So we have to assume on finishing the STATE_NEED_EXECUTION state, that there may be some remaining tasks.
// To process this tasks, we call the payload again within the STATE_LAST_EXECUTION state.
m_running_state--;
break;
case STATE_LAST_EXECUTION:
// If we're still in the STATE_LAST_EXECUTION state, than Wakeup wasn't called within the last
// execution of payload. This means we should be ready now.
// But bad luck, Wakeup might have be called right now. So break and rerun the payload
// if the state was touched right now.
if (m_running_state-- != STATE_LAST_EXECUTION)
break;
// Else we're likely in the STATE_DONE state now, so wakeup the waiting threads right now.
// However, if we're not in the STATE_DONE state any more, the event should also be
// triggered so that we'll skip the next waiting call quite fast.
m_done_event.Set();
case STATE_DONE:
// We're done now. So time to check if we want to sleep or if we want to stay in a busy loop.
if (m_may_sleep.TestAndClear())
{
// Try to set the sleeping state.
if (m_running_state-- != STATE_DONE)
break;
}
else
{
// Busy loop.
break;
}
case STATE_SLEEPING:
// Just relax
if (timeout > 0)
{
m_new_work_event.WaitFor(std::chrono::milliseconds(timeout));
}
else
{
m_new_work_event.Wait();
}
break;
}
}
// Shutdown down, so get a safe state
m_running_state.store(STATE_DONE);
m_stopped.Set();
// Wake up the last Wait calls.
m_done_event.Set();
}
// Quits the mainloop.
// By default, it will wait until the Mainloop quits.
// Be careful to not use the blocking way within the payload of the Run() method.
void Stop(bool block = true)
{
if (m_stopped.IsSet())
return;
m_shutdown.Set();
// We have to interrupt the sleeping call to let the worker shut down soon.
Wakeup();
if (block)
Wait();
}
bool IsRunning() const
{
return !m_stopped.IsSet() && !m_shutdown.IsSet();
}
// This functions should be triggered by regulary by time. So we will fall back from
// the busy loop to the sleeping way.
void AllowSleep()
{
m_may_sleep.Set();
}
private:
std::mutex m_wait_lock;
std::mutex m_prepare_lock;
Flag m_stopped; // This one is set, Wait() shall not block.
Flag m_shutdown; // If this one is set, the loop shall be quit.
Event m_new_work_event;
Event m_done_event;
enum RUNNING_TYPE {
STATE_SLEEPING = 0,
STATE_DONE = 1,
STATE_LAST_EXECUTION = 2,
STATE_NEED_EXECUTION = 3
};
std::atomic<int> m_running_state; // must be of type RUNNING_TYPE
Flag m_may_sleep; // If this one is set, we fall back from the busy loop to an event based synchronization.
};
}

View File

@ -40,6 +40,7 @@
<ClInclude Include="Atomic_Win32.h" />
<ClInclude Include="BitField.h" />
<ClInclude Include="BitSet.h" />
<ClInclude Include="BlockLoop.h" />
<ClInclude Include="BreakPoints.h" />
<ClInclude Include="CDUtils.h" />
<ClInclude Include="ChunkFile.h" />

View File

@ -14,6 +14,7 @@
<ClInclude Include="Atomic_Win32.h" />
<ClInclude Include="BitField.h" />
<ClInclude Include="BitSet.h" />
<ClInclude Include="BlockingLoop.h" />
<ClInclude Include="BreakPoints.h" />
<ClInclude Include="CDUtils.h" />
<ClInclude Include="ChunkFile.h" />

View File

@ -481,7 +481,7 @@ void Idle()
{
//DEBUG_LOG(POWERPC, "Idle");
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
{
//When the FIFO is processing data we must not advance because in this way
//the VI will be desynchronized. So, We are waiting until the FIFO finish and

View File

@ -62,6 +62,7 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule:
#include "Core/PowerPC/PowerPC.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/VideoBackendBase.h"
@ -189,7 +190,7 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate)
static void ThrottleCallback(u64 last_time, int cyclesLate)
{
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
CommandProcessor::s_gpuMaySleep.Set();
GpuMaySleep();
u32 time = Common::Timer::GetTimeMs();

View File

@ -49,8 +49,6 @@ static std::atomic<bool> s_interrupt_finish_waiting;
static std::atomic<u32> s_vi_ticks(CommandProcessor::m_cpClockOrigin);
Common::Flag s_gpuMaySleep;
static bool IsOnThread()
{
return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;

View File

@ -17,7 +17,6 @@ namespace CommandProcessor
{
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern Common::Flag s_gpuMaySleep;
// internal hardware addresses
enum

View File

@ -5,6 +5,7 @@
#include <atomic>
#include "Common/Atomic.h"
#include "Common/BlockingLoop.h"
#include "Common/ChunkFile.h"
#include "Common/CPUDetect.h"
#include "Common/Event.h"
@ -26,11 +27,13 @@
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoConfig.h"
bool g_bSkipCurrentFrame = false;
static std::atomic<bool> s_gpu_running_state;
static Common::BlockingLoop s_gpu_mainloop;
static std::atomic<bool> s_emu_running_state;
// Most of this array is unlikely to be faulted in...
@ -41,8 +44,6 @@ static u8* s_fifo_aux_read_ptr;
bool g_use_deterministic_gpu_thread;
// STATE_TO_SAVE
static std::mutex s_video_buffer_lock;
static std::condition_variable s_video_buffer_cond;
static u8* s_video_buffer;
static u8* s_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr;
@ -60,12 +61,6 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again
static Common::Event s_gpu_new_work_event;
static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do
static Common::Event s_gpu_done_event;
void Fifo_DoState(PointerWrap &p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
@ -102,13 +97,14 @@ void Fifo_Init()
// Padded so that SIMD overreads in the vertex loader are safe
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4);
ResetVideoBuffer();
s_gpu_running_state.store(false);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
s_gpu_mainloop.Prepare();
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
}
void Fifo_Shutdown()
{
if (s_gpu_running_state.load())
if (s_gpu_mainloop.IsRunning())
PanicAlert("Fifo shutting down while active");
FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
@ -135,27 +131,22 @@ void ExitGpuLoop()
FlushGpu();
// Terminate GPU thread loop
s_gpu_running_state.store(false);
s_emu_running_state.store(true);
s_gpu_new_work_event.Set();
s_gpu_mainloop.Stop(false);
}
void EmulatorState(bool running)
{
s_emu_running_state.store(running);
s_gpu_new_work_event.Set();
s_gpu_mainloop.Wakeup();
}
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
if (g_use_deterministic_gpu_thread)
{
std::unique_lock<std::mutex> lk(s_video_buffer_lock);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_cond.wait(lk, [&]() {
return !s_gpu_running_state.load() || s_video_buffer_seen_ptr == write_ptr;
});
if (!s_gpu_running_state.load())
s_gpu_mainloop.Wait();
if (!s_gpu_mainloop.IsRunning())
return;
// Opportunistically reset FIFOs so we don't wrap around.
@ -168,6 +159,8 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
if (may_move_read_ptr)
{
u8* write_ptr = s_video_buffer_write_ptr;
// what's left over in the buffer
size_t size = write_ptr - s_video_buffer_pp_read_ptr;
@ -188,7 +181,7 @@ void PushFifoAuxBuffer(void* ptr, size_t size)
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
if (!s_gpu_running_state.load())
if (!s_gpu_mainloop.IsRunning())
{
// GPU is shutting down
return;
@ -243,9 +236,9 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND);
if (!s_gpu_running_state.load())
if (!s_gpu_mainloop.IsRunning())
{
// GPU is shutting down
// GPU is shutting down, so the next asserts may fail
return;
}
@ -283,18 +276,19 @@ void ResetVideoBuffer()
// Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop()
{
s_gpu_running_state.store(true);
SCPFifoStruct &fifo = CommandProcessor::fifo;
u32 cyclesExecuted = 0;
AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false);
while (s_gpu_running_state.load())
{
s_gpu_mainloop.Run(
[] {
g_video_backend->PeekMessages();
if (g_use_deterministic_gpu_thread && s_emu_running_state.load())
// Do nothing while paused
if (!s_emu_running_state.load())
return;
if (g_use_deterministic_gpu_thread)
{
AsyncRequests::GetInstance()->PullEvents();
@ -305,16 +299,13 @@ void RunGpuLoop()
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
{
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
s_video_buffer_seen_ptr = write_ptr;
s_video_buffer_cond.notify_all();
}
s_video_buffer_seen_ptr = write_ptr;
}
}
else if (s_emu_running_state.load())
else
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
AsyncRequests::GetInstance()->PullEvents();
CommandProcessor::SetCPStatusFromGPU();
@ -333,6 +324,7 @@ void RunGpuLoop()
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin)
{
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
@ -369,31 +361,15 @@ void RunGpuLoop()
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents();
}
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManager::Flush();
// don't release the GPU running state on sync GPU waits
fifo.isGpuReadingData = !run_loop;
}
}, 100);
s_gpu_is_pending.Clear();
s_gpu_done_event.Set();
if (s_gpu_is_running.IsSet())
{
if (CommandProcessor::s_gpuMaySleep.IsSet())
{
// Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop
s_gpu_is_pending.Set();
s_gpu_is_running.Clear();
CommandProcessor::s_gpuMaySleep.Clear();
}
}
else
{
s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100));
}
}
// wake up SyncGPU if we were interrupted
s_video_buffer_cond.notify_all();
AsyncRequests::GetInstance()->SetEnable(false);
AsyncRequests::GetInstance()->SetPassthrough(true);
}
@ -403,11 +379,12 @@ void FlushGpu()
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
return;
while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet())
{
CommandProcessor::s_gpuMaySleep.Set();
s_gpu_done_event.Wait();
}
s_gpu_mainloop.Wait();
}
void GpuMaySleep()
{
s_gpu_mainloop.AllowSleep();
}
bool AtBreakpoint()
@ -429,6 +406,7 @@ void RunGpu()
if (g_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
s_gpu_mainloop.Wakeup();
}
else
{
@ -460,11 +438,9 @@ void RunGpu()
}
// wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet())
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
{
s_gpu_is_pending.Set();
s_gpu_is_running.Set();
s_gpu_new_work_event.Set();
s_gpu_mainloop.Wakeup();
}
}

View File

@ -43,6 +43,7 @@ void* PopFifoAuxBuffer(size_t size);
void FlushGpu();
void RunGpu();
void GpuMaySleep();
void RunGpuLoop();
void ExitGpuLoop();
void EmulatorState(bool running);

View File

@ -0,0 +1,84 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <atomic>
#include <thread>
#include <gtest/gtest.h>
#include "Common/BlockingLoop.h"
TEST(BlockingLoop, MultiThreaded)
{
Common::BlockingLoop loop;
std::atomic<int> signaled_a(0);
std::atomic<int> received_a(0);
std::atomic<int> signaled_b(0);
std::atomic<int> received_b(0);
for (int i = 0; i < 100; i++)
{
// Invalidate the current state.
received_a.store(signaled_a.load() + 1);
received_b.store(signaled_b.load() + 123);
// Must not block as the loop is stopped.
loop.Wait();
std::thread loop_thread(
[&]() {
loop.Run(
[&]() {
received_a.store(signaled_a.load());
received_b.store(signaled_b.load());
});
});
// Now Wait must block.
loop.Prepare();
// The payload must run at least once on startup.
loop.Wait();
EXPECT_EQ(signaled_a.load(), received_a.load());
EXPECT_EQ(signaled_b.load(), received_b.load());
std::thread run_a_thread(
[&]() {
for (int j = 0; j < 100; j++)
{
for (int k = 0; k < 100; k++)
{
signaled_a++;
loop.Wakeup();
}
loop.Wait();
EXPECT_EQ(signaled_a.load(), received_a.load());
}
});
std::thread run_b_thread(
[&]() {
for (int j = 0; j < 100; j++)
{
for (int k = 0; k < 100; k++)
{
signaled_b++;
loop.Wakeup();
}
loop.Wait();
EXPECT_EQ(signaled_b.load(), received_b.load());
}
});
run_a_thread.join();
run_b_thread.join();
loop.Stop();
// Must not block
loop.Wait();
loop_thread.join();
}
}

View File

@ -0,0 +1,51 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <atomic>
#include <thread>
#include <gtest/gtest.h>
#include "Common/BlockingLoop.h"
#include "Common/Thread.h"
TEST(BusyLoopTest, MultiThreaded)
{
Common::BlockingLoop loop;
Common::Event e;
for (int i = 0; i < 100; i++)
{
loop.Prepare();
std::thread loop_thread(
[&]() {
loop.Run(
[&]() {
e.Set();
});
});
// Ping - Pong
for (int j = 0; j < 10; j++)
{
loop.Wakeup();
e.Wait();
// Just waste some time. So the main loop did fall back to the sleep state much more likely.
Common::SleepCurrentThread(1);
}
for (int j = 0; j < 100; j++)
{
// We normally have to call Wakeup to assure the Event is triggered.
// But this check is for an internal feature of the BlockingLoop.
// It's implemented to fall back to a busy loop regulary.
// If we're in the busy loop, the payload (and so the Event) is called all the time.
//loop.Wakeup();
e.Wait();
}
loop.Stop();
loop_thread.join();
}
}

View File

@ -1,5 +1,7 @@
add_dolphin_test(BitFieldTest BitFieldTest.cpp)
add_dolphin_test(BitSetTest BitSetTest.cpp)
add_dolphin_test(BlockingLoopTest BlockingLoopTest.cpp)
add_dolphin_test(BusyLoopTest BusyLoopTest.cpp)
add_dolphin_test(CommonFuncsTest CommonFuncsTest.cpp)
add_dolphin_test(EventTest EventTest.cpp)
add_dolphin_test(FifoQueueTest FifoQueueTest.cpp)