Merge pull request #7517 from delroth/analytics-perf

Report performance information through our analytics subsystem
This commit is contained in:
Pierre Bourdon 2018-10-28 00:43:12 +02:00 committed by GitHub
commit 756a2fe14d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 265 additions and 69 deletions

View File

@ -5,6 +5,7 @@
#include <cmath>
#include <cstdio>
#include <string>
#include <type_traits>
#include "Common/Analytics.h"
#include "Common/CommonTypes.h"
@ -17,7 +18,7 @@ namespace
{
// Format version number, used as the first byte of every report sent.
// Increment for any change to the wire format.
constexpr u8 WIRE_FORMAT_VERSION = 0;
constexpr u8 WIRE_FORMAT_VERSION = 1;
// Identifiers for the value types supported by the analytics reporting wire
// format.
@ -28,8 +29,17 @@ enum class TypeId : u8
UINT = 2,
SINT = 3,
FLOAT = 4,
// Flags which can be combined with other types.
ARRAY = 0x80,
};
TypeId operator|(TypeId l, TypeId r)
{
using ut = std::underlying_type_t<TypeId>;
return static_cast<TypeId>(static_cast<ut>(l) | static_cast<ut>(r));
}
void AppendBool(std::string* out, bool v)
{
out->push_back(v ? '\xFF' : '\x00');
@ -112,6 +122,15 @@ void AnalyticsReportBuilder::AppendSerializedValue(std::string* report, float v)
AppendBytes(report, reinterpret_cast<u8*>(&v), sizeof(v), false);
}
void AnalyticsReportBuilder::AppendSerializedValueVector(std::string* report,
const std::vector<u32>& v)
{
AppendType(report, TypeId::UINT | TypeId::ARRAY);
AppendVarInt(report, v.size());
for (u32 x : v)
AppendVarInt(report, x);
}
AnalyticsReporter::AnalyticsReporter()
{
m_reporter_thread = std::thread(&AnalyticsReporter::ThreadProc, this);

View File

@ -95,6 +95,15 @@ public:
return *this;
}
template <typename T>
AnalyticsReportBuilder& AddData(const std::string& key, const std::vector<T>& value)
{
std::lock_guard<std::mutex> lk(m_lock);
AppendSerializedValue(&m_report, key);
AppendSerializedValueVector(&m_report, value);
return *this;
}
std::string Get() const
{
std::lock_guard<std::mutex> lk(m_lock);
@ -118,6 +127,8 @@ protected:
static void AppendSerializedValue(std::string* report, s32 v);
static void AppendSerializedValue(std::string* report, float v);
static void AppendSerializedValueVector(std::string* report, const std::vector<u32>& v);
// Should really be a std::shared_mutex, unfortunately that's C++17 only.
mutable std::mutex m_lock;
std::string m_report;

View File

@ -5,6 +5,7 @@
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#if defined(_WIN32)
#include <windows.h>
@ -20,6 +21,7 @@
#include "Common/CommonTypes.h"
#include "Common/Random.h"
#include "Common/StringUtil.h"
#include "Common/Timer.h"
#include "Common/Version.h"
#include "Core/ConfigManager.h"
#include "Core/HW/GCPad.h"
@ -128,6 +130,71 @@ void DolphinAnalytics::ReportGameStart()
Common::AnalyticsReportBuilder builder(m_per_game_builder);
builder.AddData("type", "game-start");
Send(builder);
InitializePerformanceSampling();
}
void DolphinAnalytics::ReportPerformanceInfo(PerformanceSample&& sample)
{
if (ShouldStartPerformanceSampling())
{
m_sampling_performance_info = true;
}
if (m_sampling_performance_info)
{
m_performance_samples.emplace_back(std::move(sample));
}
if (m_performance_samples.size() >= NUM_PERFORMANCE_SAMPLES_PER_REPORT)
{
std::vector<u32> speed_times_1000(m_performance_samples.size());
std::vector<u32> num_prims(m_performance_samples.size());
std::vector<u32> num_draw_calls(m_performance_samples.size());
for (size_t i = 0; i < m_performance_samples.size(); ++i)
{
speed_times_1000[i] = static_cast<u32>(m_performance_samples[i].speed_ratio * 1000);
num_prims[i] = m_performance_samples[i].num_prims;
num_draw_calls[i] = m_performance_samples[i].num_draw_calls;
}
// The per game builder should already exist -- there is no way we can be reporting performance
// info without a game start event having been generated.
Common::AnalyticsReportBuilder builder(m_per_game_builder);
builder.AddData("type", "performance");
builder.AddData("speed", speed_times_1000);
builder.AddData("prims", num_prims);
builder.AddData("draw-calls", num_draw_calls);
Send(builder);
// Clear up and stop sampling until next time ShouldStartPerformanceSampling() says so.
m_performance_samples.clear();
m_sampling_performance_info = false;
}
}
void DolphinAnalytics::InitializePerformanceSampling()
{
m_performance_samples.clear();
m_sampling_performance_info = false;
u64 wait_us =
PERFORMANCE_SAMPLING_INITIAL_WAIT_TIME_SECS * 1000000 +
Common::Random::GenerateValue<u64>() % (PERFORMANCE_SAMPLING_WAIT_TIME_JITTER_SECS * 1000000);
m_sampling_next_start_us = Common::Timer::GetTimeUs() + wait_us;
}
bool DolphinAnalytics::ShouldStartPerformanceSampling()
{
if (Common::Timer::GetTimeUs() < m_sampling_next_start_us)
return false;
u64 wait_us =
PERFORMANCE_SAMPLING_INTERVAL_SECS * 1000000 +
Common::Random::GenerateValue<u64>() % (PERFORMANCE_SAMPLING_WAIT_TIME_JITTER_SECS * 1000000);
m_sampling_next_start_us = Common::Timer::GetTimeUs() + wait_us;
return true;
}
void DolphinAnalytics::MakeBaseBuilder()

View File

@ -7,8 +7,10 @@
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include "Common/Analytics.h"
#include "Common/CommonTypes.h"
#if defined(ANDROID)
#include <functional>
@ -40,6 +42,18 @@ public:
// per-game base data.
void ReportGameStart();
struct PerformanceSample
{
double speed_ratio; // See SystemTimers::GetEstimatedEmulationPerformance().
int num_prims;
int num_draw_calls;
};
// Reports performance information. This method performs its own throttling / aggregation --
// calling it does not guarantee when a report will actually be sent.
//
// This method is NOT thread-safe.
void ReportPerformanceInfo(PerformanceSample&& sample);
// Forward Send method calls to the reporter.
template <typename T>
void Send(T report)
@ -63,6 +77,22 @@ private:
// values created by MakeUniqueId.
std::string m_unique_id;
// Performance sampling configuration constants.
//
// 5min after startup + rand(0, 3min) jitter time, collect performance for 100 frames in a row.
// Repeat collection after 30min + rand(0, 3min).
static constexpr int NUM_PERFORMANCE_SAMPLES_PER_REPORT = 100;
static constexpr int PERFORMANCE_SAMPLING_INITIAL_WAIT_TIME_SECS = 300;
static constexpr int PERFORMANCE_SAMPLING_WAIT_TIME_JITTER_SECS = 180;
static constexpr int PERFORMANCE_SAMPLING_INTERVAL_SECS = 1800;
// Performance sampling state & internal helpers.
void InitializePerformanceSampling(); // Called on game start / title switch.
bool ShouldStartPerformanceSampling();
u64 m_sampling_next_start_us; // Next timestamp (in us) at which to trigger sampling.
bool m_sampling_performance_info = false; // Whether we are currently collecting samples.
std::vector<PerformanceSample> m_performance_samples;
// Builder that contains all non variable data that should be sent with all
// reports.
Common::AnalyticsReportBuilder m_base_builder;

View File

@ -45,6 +45,7 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
#include "Core/HW/SystemTimers.h"
#include <cfloat>
#include <cmath>
#include <cstdlib>
@ -68,36 +69,43 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
namespace SystemTimers
{
static CoreTiming::EventType* et_Dec;
static CoreTiming::EventType* et_VI;
static CoreTiming::EventType* et_AudioDMA;
static CoreTiming::EventType* et_DSP;
static CoreTiming::EventType* et_IPC_HLE;
namespace
{
CoreTiming::EventType* et_Dec;
CoreTiming::EventType* et_VI;
CoreTiming::EventType* et_AudioDMA;
CoreTiming::EventType* et_DSP;
CoreTiming::EventType* et_IPC_HLE;
// PatchEngine updates every 1/60th of a second by default
static CoreTiming::EventType* et_PatchEngine;
static CoreTiming::EventType* et_Throttle;
CoreTiming::EventType* et_PatchEngine;
CoreTiming::EventType* et_Throttle;
static u32 s_cpu_core_clock = 486000000u; // 486 mhz (its not 485, stop bugging me!)
u32 s_cpu_core_clock = 486000000u; // 486 mhz (its not 485, stop bugging me!)
// These two are badly educated guesses.
// Feel free to experiment. Set them in Init below.
// This is a fixed value, don't change it
static int s_audio_dma_period;
int s_audio_dma_period;
// This is completely arbitrary. If we find that we need lower latency,
// we can just increase this number.
static int s_ipc_hle_period;
int s_ipc_hle_period;
// Custom RTC
static s64 s_localtime_rtc_offset = 0;
s64 s_localtime_rtc_offset = 0;
u32 GetTicksPerSecond()
{
return s_cpu_core_clock;
}
// For each emulated milliseconds, what was the real time timestamp (excluding sleep time). This is
// a "special" ring buffer where we only need to read the first and last value.
std::array<u64, 1000> s_emu_to_real_time_ring_buffer;
size_t s_emu_to_real_time_index;
std::mutex s_emu_to_real_time_mutex;
// How much time was spent sleeping since the emulator started. Note: this does not need to be reset
// at initialization (or ever), since only the "derivative" of that value really matters.
u64 s_time_spent_sleeping;
// DSP/CPU timeslicing.
static void DSPCallback(u64 userdata, s64 cyclesLate)
void DSPCallback(u64 userdata, s64 cyclesLate)
{
// splits up the cycle budget in case lle is used
// for hle, just gives all of the slice to hle
@ -105,14 +113,14 @@ static void DSPCallback(u64 userdata, s64 cyclesLate)
CoreTiming::ScheduleEvent(DSP::GetDSPEmulator()->DSP_UpdateRate() - cyclesLate, et_DSP);
}
static void AudioDMACallback(u64 userdata, s64 cyclesLate)
void AudioDMACallback(u64 userdata, s64 cyclesLate)
{
int period = s_cpu_core_clock / (AudioInterface::GetAIDSampleRate() * 4 / 32);
DSP::UpdateAudioDMA(); // Push audio to speakers.
CoreTiming::ScheduleEvent(period - cyclesLate, et_AudioDMA);
}
static void IPC_HLE_UpdateCallback(u64 userdata, s64 cyclesLate)
void IPC_HLE_UpdateCallback(u64 userdata, s64 cyclesLate)
{
if (SConfig::GetInstance().bWii)
{
@ -121,18 +129,88 @@ static void IPC_HLE_UpdateCallback(u64 userdata, s64 cyclesLate)
}
}
static void VICallback(u64 userdata, s64 cyclesLate)
void VICallback(u64 userdata, s64 cyclesLate)
{
VideoInterface::Update(CoreTiming::GetTicks() - cyclesLate);
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerHalfLine() - cyclesLate, et_VI);
}
static void DecrementerCallback(u64 userdata, s64 cyclesLate)
void DecrementerCallback(u64 userdata, s64 cyclesLate)
{
PowerPC::ppcState.spr[SPR_DEC] = 0xFFFFFFFF;
PowerPC::ppcState.Exceptions |= EXCEPTION_DECREMENTER;
}
void PatchEngineCallback(u64 userdata, s64 cycles_late)
{
// We have 2 periods, a 1000 cycle error period and the VI period.
// We have to carefully combine these together so that we stay on the VI period without drifting.
u32 vi_interval = VideoInterface::GetTicksPerField();
s64 cycles_pruned = (userdata + cycles_late) % vi_interval;
s64 next_schedule = 0;
// Try to patch mem and run the Action Replay
if (PatchEngine::ApplyFramePatches())
{
next_schedule = vi_interval - cycles_pruned;
cycles_pruned = 0;
}
else
{
// The patch failed, usually because the CPU is in an inappropriate state (interrupt handler).
// We'll try again after 1000 cycles.
next_schedule = 1000;
cycles_pruned += next_schedule;
}
CoreTiming::ScheduleEvent(next_schedule, et_PatchEngine, cycles_pruned);
}
void ThrottleCallback(u64 last_time, s64 cyclesLate)
{
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
Fifo::GpuMaySleep();
u64 time = Common::Timer::GetTimeUs();
s64 diff = last_time - time;
const SConfig& config = SConfig::GetInstance();
bool frame_limiter = config.m_EmulationSpeed > 0.0f && !Core::GetIsThrottlerTempDisabled();
u32 next_event = GetTicksPerSecond() / 1000;
{
std::lock_guard<std::mutex> lk(s_emu_to_real_time_mutex);
s_emu_to_real_time_ring_buffer[s_emu_to_real_time_index] = time - s_time_spent_sleeping;
s_emu_to_real_time_index =
(s_emu_to_real_time_index + 1) % s_emu_to_real_time_ring_buffer.size();
}
if (frame_limiter)
{
if (config.m_EmulationSpeed != 1.0f)
next_event = u32(next_event * config.m_EmulationSpeed);
const s64 max_fallback = config.iTimingVariance * 1000;
if (abs(diff) > max_fallback)
{
DEBUG_LOG(COMMON, "system too %s, %ld ms skipped", diff < 0 ? "slow" : "fast",
abs(diff) - max_fallback);
last_time = time - max_fallback;
}
else if (diff > 1000)
{
Common::SleepCurrentThread(diff / 1000);
s_time_spent_sleeping += Common::Timer::GetTimeUs() - time;
}
}
CoreTiming::ScheduleEvent(next_event - cyclesLate, et_Throttle, last_time + 1000);
}
} // namespace
u32 GetTicksPerSecond()
{
return s_cpu_core_clock;
}
void DecrementerSet()
{
u32 decValue = PowerPC::ppcState.spr[SPR_DEC];
@ -170,57 +248,29 @@ s64 GetLocalTimeRTCOffset()
return s_localtime_rtc_offset;
}
static void PatchEngineCallback(u64 userdata, s64 cycles_late)
double GetEstimatedEmulationPerformance()
{
// We have 2 periods, a 1000 cycle error period and the VI period.
// We have to carefully combine these together so that we stay on the VI period without drifting.
u32 vi_interval = VideoInterface::GetTicksPerField();
s64 cycles_pruned = (userdata + cycles_late) % vi_interval;
s64 next_schedule = 0;
u64 ts_now, ts_before; // In microseconds
{
std::lock_guard<std::mutex> lk(s_emu_to_real_time_mutex);
size_t index_now = s_emu_to_real_time_index == 0 ? s_emu_to_real_time_ring_buffer.size() :
s_emu_to_real_time_index - 1;
size_t index_before = s_emu_to_real_time_index;
// Try to patch mem and run the Action Replay
if (PatchEngine::ApplyFramePatches())
{
next_schedule = vi_interval - cycles_pruned;
cycles_pruned = 0;
}
else
{
// The patch failed, usually because the CPU is in an inappropriate state (interrupt handler).
// We'll try again after 1000 cycles.
next_schedule = 1000;
cycles_pruned += next_schedule;
ts_now = s_emu_to_real_time_ring_buffer[index_now];
ts_before = s_emu_to_real_time_ring_buffer[index_before];
}
CoreTiming::ScheduleEvent(next_schedule, et_PatchEngine, cycles_pruned);
}
static void ThrottleCallback(u64 last_time, s64 cyclesLate)
{
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
Fifo::GpuMaySleep();
u32 time = Common::Timer::GetTimeMs();
int diff = (u32)last_time - time;
const SConfig& config = SConfig::GetInstance();
bool frame_limiter = config.m_EmulationSpeed > 0.0f && !Core::GetIsThrottlerTempDisabled();
u32 next_event = GetTicksPerSecond() / 1000;
if (frame_limiter)
if (ts_before == 0)
{
if (config.m_EmulationSpeed != 1.0f)
next_event = u32(next_event * config.m_EmulationSpeed);
const int max_fallback = config.iTimingVariance;
if (abs(diff) > max_fallback)
{
DEBUG_LOG(COMMON, "system too %s, %d ms skipped", diff < 0 ? "slow" : "fast",
abs(diff) - max_fallback);
last_time = time - max_fallback;
// Not enough data yet to estimate. We could technically provide an estimate based on a shorter
// time horizon, but it's not really worth it.
return 1.0;
}
else if (diff > 0)
Common::SleepCurrentThread(diff);
}
CoreTiming::ScheduleEvent(next_event - cyclesLate, et_Throttle, last_time + 1);
u64 delta_us = ts_now - ts_before;
double emulated_us = s_emu_to_real_time_ring_buffer.size() * 1000.0; // For each emulated ms.
return delta_us == 0 ? DBL_MAX : emulated_us / delta_us;
}
// split from Init to break a circular dependency between VideoInterface::Init and
@ -282,12 +332,14 @@ void Init()
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerHalfLine(), et_VI);
CoreTiming::ScheduleEvent(0, et_DSP);
CoreTiming::ScheduleEvent(s_audio_dma_period, et_AudioDMA);
CoreTiming::ScheduleEvent(0, et_Throttle, Common::Timer::GetTimeMs());
CoreTiming::ScheduleEvent(0, et_Throttle, Common::Timer::GetTimeUs());
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerField(), et_PatchEngine);
if (SConfig::GetInstance().bWii)
CoreTiming::ScheduleEvent(s_ipc_hle_period, et_IPC_HLE);
s_emu_to_real_time_ring_buffer.fill(0);
}
void Shutdown()
@ -296,4 +348,4 @@ void Shutdown()
s_localtime_rtc_offset = 0;
}
} // namespace
} // namespace SystemTimers

View File

@ -53,4 +53,13 @@ void TimeBaseSet();
u64 GetFakeTimeBase();
// Custom RTC
s64 GetLocalTimeRTCOffset();
}
// Returns an estimate of how fast/slow the emulation is running (excluding throttling induced sleep
// time). The estimate is computed over the last 1s of emulated time. Example values:
//
// - 0.5: the emulator is running at 50% speed (falling behind).
// - 1.0: the emulator is running at 100% speed.
// - 2.0: the emulator is running at 200% speed (or 100% speed but sleeping half of the time).
double GetEstimatedEmulationPerformance();
} // namespace SystemTimers

View File

@ -34,10 +34,12 @@
#include "Common/Thread.h"
#include "Common/Timer.h"
#include "Core/Analytics.h"
#include "Core/Config/SYSCONFSettings.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/SystemTimers.h"
#include "Core/HW/VideoInterface.h"
#include "Core/Host.h"
#include "Core/Movie.h"
@ -698,6 +700,12 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
m_fps_counter.Update();
DolphinAnalytics::PerformanceSample perf_sample;
perf_sample.speed_ratio = SystemTimers::GetEstimatedEmulationPerformance();
perf_sample.num_prims = stats.thisFrame.numPrims + stats.thisFrame.numDLPrims;
perf_sample.num_draw_calls = stats.thisFrame.numDrawCalls;
DolphinAnalytics::Instance()->ReportPerformanceInfo(std::move(perf_sample));
if (IsFrameDumping())
DumpCurrentFrame();