From cdce5ace256c2304e995aa149e9f8c2d9650b1ec Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Tue, 28 Oct 2014 01:03:19 +0100 Subject: [PATCH 1/3] Add Linux perf JIT support (/tmp/perf-$pid.map) 'perf' is the standard builtin tool for performance analysis on recent Linux kernel. Its source code is shipped within the kernel repository. 'perf' has basic support for JIT. For each process, it can read a file named /tmp/perf-$PID.map. This file contains mapping from address range to function name in the format: 41187e2a 1a EmuCode_804a33fc with the following entries: 1. beginning of the range (hexadecimal); 2. size of the range (hexadecimal); 3. name of the function. We supply the PowerPC address of the basic block as function name. Usage: DOLPHIN_PERF_DIR=/tmp dolphin-emu & perf record -F99 -p $(pgrep dolphin-emu) --call-graph dwarf perf script | stackcollapse-perf.pl | grep EmuCode__ | flamegraph.pl > profile.svg Issue: perf does not have support for region invalidation. It reads the file in postprocessing. It probably does not work very well if a JIT region is reused for another basic block: wrong results should be expected in this case. Currently, nothing is done to prevent this. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 27 +++++++++++++++++++ Source/Core/Core/PowerPC/JitCommon/JitCache.h | 3 +++ 2 files changed, 30 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index abcdfcd656..0c680dc020 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -9,6 +9,14 @@ // performance hit, it's not enabled by default, but it's useful for // locating performance issues. +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + #include "disasm.h" #include "Common/CommonTypes.h" @@ -50,6 +58,15 @@ using namespace Gen; #if defined USE_OPROFILE && USE_OPROFILE agent = op_open_agent(); #endif + + const char* perf_dir = getenv("DOLPHIN_PERF_DIR"); + if (perf_dir && perf_dir[0]) + { + std::string filename = StringFromFormat("%s/perf-%d.map", + perf_dir, getpid()); + m_perf_map_file.open(filename, std::ios::trunc); + } + iCache.fill(JIT_ICACHE_INVALID_BYTE); iCacheEx.fill(JIT_ICACHE_INVALID_BYTE); iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE); @@ -69,6 +86,9 @@ using namespace Gen; #ifdef USE_VTUNE iJIT_NotifyEvent(iJVM_EVENT_TYPE_SHUTDOWN, nullptr); #endif + + if (m_perf_map_file.is_open()) + m_perf_map_file.close(); } // This clears the JIT cache. It's called from JitCache.cpp when the JIT cache @@ -179,6 +199,13 @@ using namespace Gen; jmethod.method_name = b.blockName; iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); #endif + + if (m_perf_map_file.is_open()) + { + m_perf_map_file << StringFromFormat( + "%" PRIx64 " %x EmuCode_%x\n", + (u64)blockCodePointers[block_num], b.codeSize, b.originalAddress); + } } const u8 **JitBaseBlockCache::GetCodePointers() diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 0a79344309..d53fa16d43 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -119,6 +120,8 @@ class JitBaseBlockCache bool m_initialized; + std::ofstream m_perf_map_file; + bool RangeIntersect(int s1, int e1, int s2, int e2) const; void LinkBlockExits(int i); void LinkBlock(int i); From 9722ae2a5d3e3695b8f0bdeb6f03b5a6682ba3b8 Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Fri, 31 Oct 2014 20:12:54 +0100 Subject: [PATCH 2/3] Move the JIT registration logic in its own file Move the JITed function/basic-block registration logic out of the CPU subsystem in order to add JIT registration to JITed DSP and Video/VertexLoader code. This necessary in order to add /tmp/perf-$pid.map support to other JITed code as they need to write to the same file. --- Source/Core/Common/CMakeLists.txt | 1 + Source/Core/Common/Common.vcxproj | 2 + Source/Core/Common/JitRegister.cpp | 115 ++++++++++++++++++ Source/Core/Common/JitRegister.h | 15 +++ .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 73 +---------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 7 -- 6 files changed, 138 insertions(+), 75 deletions(-) create mode 100644 Source/Core/Common/JitRegister.cpp create mode 100644 Source/Core/Common/JitRegister.h diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index 9a0d508f4c..ef0df028d4 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -6,6 +6,7 @@ set(SRCS BreakPoints.cpp GekkoDisassembler.cpp Hash.cpp IniFile.cpp + JitRegister.cpp MathUtil.cpp MemArena.cpp MemoryUtil.cpp diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 5d7a31904d..8b192642b5 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -62,6 +62,7 @@ + @@ -97,6 +98,7 @@ + diff --git a/Source/Core/Common/JitRegister.cpp b/Source/Core/Common/JitRegister.cpp new file mode 100644 index 0000000000..3dafaac5d7 --- /dev/null +++ b/Source/Core/Common/JitRegister.cpp @@ -0,0 +1,115 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/FileUtil.h" +#include "Common/JitRegister.h" +#include "Common/StringUtil.h" + +#ifdef _WIN32 +#include +#else +#include +#endif + +#if defined USE_OPROFILE && USE_OPROFILE +#include +#endif + +#if defined USE_VTUNE +#include +#pragma comment(lib, "libittnotify.lib") +#pragma comment(lib, "jitprofiling.lib") +#endif + +#if defined USE_OPROFILE && USE_OPROFILE +static op_agent_t s_agent = nullptr; +#endif + +static File::IOFile s_perf_map_file; + +namespace JitRegister +{ + +void Init() +{ +#if defined USE_OPROFILE && USE_OPROFILE + s_agent = op_open_agent(); +#endif + + const char* perf_dir = getenv("DOLPHIN_PERF_DIR"); + if (perf_dir && perf_dir[0]) + { + std::string filename = StringFromFormat("%s/perf-%d.map", perf_dir, getpid()); + s_perf_map_file.Open(filename, "w"); + // Disable buffering in order to avoid missing some mappings + // if the event of a crash: + std::setvbuf(s_perf_map_file.GetHandle(), NULL, _IONBF, 0); + } +} + +void Shutdown() +{ +#if defined USE_OPROFILE && USE_OPROFILE + op_close_agent(s_agent); + s_agent = nullptr; +#endif + +#ifdef USE_VTUNE + iJIT_NotifyEvent(iJVM_EVENT_TYPE_SHUTDOWN, nullptr); +#endif + + if (s_perf_map_file.IsOpen()) + s_perf_map_file.Close(); +} + +void Register(const void* base_address, u32 code_size, + const char* name, u32 original_address) +{ +#if !(defined USE_OPROFILE && USE_OPROFILE) && !defined(USE_VTUNE) + if (!s_perf_map_file.IsOpen()) + return; +#endif + + std::string symbol_name; + if (original_address) + symbol_name = StringFromFormat("%s_%x", name, original_address); + else + symbol_name = name; + +#if defined USE_OPROFILE && USE_OPROFILE + op_write_native_code(s_agent, symbol_name.data(), (u64)base_address, + base_address, code_size); +#endif + +#ifdef USE_VTUNE + iJIT_Method_Load jmethod = {0}; + jmethod.method_id = iJIT_GetNewMethodID(); + jmethod.class_file_name = ""; + jmethod.source_file_name = __FILE__; + jmethod.method_load_address = base_address; + jmethod.method_size = code_size; + jmethod.line_number_size = 0; + jmethod.method_name = symbol_name.data(); + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); +#endif + + // Linux perf /tmp/perf-$pid.map: + if (s_perf_map_file.IsOpen()) + { + std::string entry = StringFromFormat( + "%" PRIx64 " %x %s\n", + (u64)base_address, code_size, symbol_name.data()); + s_perf_map_file.WriteBytes(entry.data(), entry.size()); + } +} + +} diff --git a/Source/Core/Common/JitRegister.h b/Source/Core/Common/JitRegister.h new file mode 100644 index 0000000000..68e2687d8b --- /dev/null +++ b/Source/Core/Common/JitRegister.h @@ -0,0 +1,15 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +namespace JitRegister +{ + +void Init(); +void Shutdown(); +void Register(const void* base_address, u32 code_size, + const char* name, u32 original_address=0); + +} diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 0c680dc020..e9b0f52961 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -9,17 +9,10 @@ // performance hit, it's not enabled by default, but it's useful for // locating performance issues. -#include - -#ifdef _WIN32 -#include -#else -#include -#endif - #include "disasm.h" #include "Common/CommonTypes.h" +#include "Common/JitRegister.h" #include "Common/MemoryUtil.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/JitCommon/JitBase.h" @@ -28,18 +21,6 @@ #include #endif -#if defined USE_OPROFILE && USE_OPROFILE -#include - -op_agent_t agent; -#endif - -#if defined USE_VTUNE -#include -#pragma comment(lib, "libittnotify.lib") -#pragma comment(lib, "jitprofiling.lib") -#endif - using namespace Gen; bool JitBaseBlockCache::IsFull() const @@ -55,17 +36,7 @@ using namespace Gen; return; } -#if defined USE_OPROFILE && USE_OPROFILE - agent = op_open_agent(); -#endif - - const char* perf_dir = getenv("DOLPHIN_PERF_DIR"); - if (perf_dir && perf_dir[0]) - { - std::string filename = StringFromFormat("%s/perf-%d.map", - perf_dir, getpid()); - m_perf_map_file.open(filename, std::ios::trunc); - } + JitRegister::Init(); iCache.fill(JIT_ICACHE_INVALID_BYTE); iCacheEx.fill(JIT_ICACHE_INVALID_BYTE); @@ -79,16 +50,8 @@ using namespace Gen; { num_blocks = 0; m_initialized = false; -#if defined USE_OPROFILE && USE_OPROFILE - op_close_agent(agent); -#endif -#ifdef USE_VTUNE - iJIT_NotifyEvent(iJVM_EVENT_TYPE_SHUTDOWN, nullptr); -#endif - - if (m_perf_map_file.is_open()) - m_perf_map_file.close(); + JitRegister::Shutdown(); } // This clears the JIT cache. It's called from JitCache.cpp when the JIT cache @@ -178,34 +141,8 @@ using namespace Gen; LinkBlockExits(block_num); } -#if defined USE_OPROFILE && USE_OPROFILE - char buf[100]; - sprintf(buf, "EmuCode%x", b.originalAddress); - const u8* blockStart = blockCodePointers[block_num]; - op_write_native_code(agent, buf, (uint64_t)blockStart, - blockStart, b.codeSize); -#endif - -#ifdef USE_VTUNE - sprintf(b.blockName, "EmuCode_0x%08x", b.originalAddress); - - iJIT_Method_Load jmethod = {0}; - jmethod.method_id = iJIT_GetNewMethodID(); - jmethod.class_file_name = ""; - jmethod.source_file_name = __FILE__; - jmethod.method_load_address = (void*)blockCodePointers[block_num]; - jmethod.method_size = b.codeSize; - jmethod.line_number_size = 0; - jmethod.method_name = b.blockName; - iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); -#endif - - if (m_perf_map_file.is_open()) - { - m_perf_map_file << StringFromFormat( - "%" PRIx64 " %x EmuCode_%x\n", - (u64)blockCodePointers[block_num], b.codeSize, b.originalAddress); - } + JitRegister::Register(blockCodePointers[block_num], b.codeSize, + "JIT_PPC", b.originalAddress); } const u8 **JitBaseBlockCache::GetCodePointers() diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index d53fa16d43..21984c8a70 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -57,10 +56,6 @@ struct JitBlock u64 ticStart; // for profiling - time. u64 ticStop; // for profiling - time. u64 ticCounter; // for profiling - time. - -#ifdef USE_VTUNE - char blockName[32]; -#endif }; typedef void (*CompiledCode)(); @@ -120,8 +115,6 @@ class JitBaseBlockCache bool m_initialized; - std::ofstream m_perf_map_file; - bool RangeIntersect(int s1, int e1, int s2, int e2) const; void LinkBlockExits(int i); void LinkBlock(int i); From 5b9aeaa686b0ffdf242a656f37ec34cf9a204f1d Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Mon, 24 Nov 2014 23:17:31 +0100 Subject: [PATCH 3/3] Use CLI argument for Linux perf JIT support --- Source/Core/Common/JitRegister.cpp | 8 ++++---- Source/Core/Core/CoreParameter.h | 2 ++ Source/Core/DolphinWX/Main.cpp | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Source/Core/Common/JitRegister.cpp b/Source/Core/Common/JitRegister.cpp index 3dafaac5d7..69ea7e5bba 100644 --- a/Source/Core/Common/JitRegister.cpp +++ b/Source/Core/Common/JitRegister.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -13,6 +12,7 @@ #include "Common/FileUtil.h" #include "Common/JitRegister.h" #include "Common/StringUtil.h" +#include "Core/ConfigManager.h" #ifdef _WIN32 #include @@ -45,10 +45,10 @@ void Init() s_agent = op_open_agent(); #endif - const char* perf_dir = getenv("DOLPHIN_PERF_DIR"); - if (perf_dir && perf_dir[0]) + const std::string& perf_dir = SConfig::GetInstance().m_LocalCoreStartupParameter.m_perfDir; + if (!perf_dir.empty()) { - std::string filename = StringFromFormat("%s/perf-%d.map", perf_dir, getpid()); + std::string filename = StringFromFormat("%s/perf-%d.map", perf_dir.data(), getpid()); s_perf_map_file.Open(filename, "w"); // Disable buffering in order to avoid missing some mappings // if the event of a crash: diff --git a/Source/Core/Core/CoreParameter.h b/Source/Core/Core/CoreParameter.h index 28686a55bc..f7c4f03fe5 100644 --- a/Source/Core/Core/CoreParameter.h +++ b/Source/Core/Core/CoreParameter.h @@ -243,6 +243,8 @@ struct SCoreStartupParameter std::string m_strGameIniDefaultRevisionSpecific; std::string m_strGameIniLocal; + std::string m_perfDir; + // Constructor just calls LoadDefaults SCoreStartupParameter(); diff --git a/Source/Core/DolphinWX/Main.cpp b/Source/Core/DolphinWX/Main.cpp index 28be496fc2..69aafbad35 100644 --- a/Source/Core/DolphinWX/Main.cpp +++ b/Source/Core/DolphinWX/Main.cpp @@ -142,10 +142,12 @@ bool DolphinApp::OnInit() bool UseLogger = false; bool selectVideoBackend = false; bool selectAudioEmulation = false; + bool selectPerfDir = false; wxString videoBackendName; wxString audioEmulationName; wxString userPath; + wxString perfDir; #if wxUSE_CMDLINE_PARSER // Parse command lines wxCmdLineEntryDesc cmdLineDesc[] = @@ -195,6 +197,11 @@ bool DolphinApp::OnInit() "User folder path", wxCMD_LINE_VAL_STRING, wxCMD_LINE_PARAM_OPTIONAL }, + { + wxCMD_LINE_OPTION, "P", "perf_dir", + "Directory for Lionux perf perf-$pid.map file", + wxCMD_LINE_VAL_STRING, wxCMD_LINE_PARAM_OPTIONAL + }, { wxCMD_LINE_NONE, nullptr, nullptr, nullptr, wxCMD_LINE_VAL_NONE, 0 } @@ -219,6 +226,7 @@ bool DolphinApp::OnInit() BatchMode = parser.Found("batch"); selectVideoBackend = parser.Found("video_backend", &videoBackendName); selectAudioEmulation = parser.Found("audio_emulation", &audioEmulationName); + selectPerfDir = parser.Found("perf_dir", &perfDir); playMovie = parser.Found("movie", &movieFile); if (parser.Found("user", &userPath)) @@ -255,6 +263,12 @@ bool DolphinApp::OnInit() UICommon::CreateDirectories(); UICommon::Init(); + if (selectPerfDir) + { + SConfig::GetInstance().m_LocalCoreStartupParameter.m_perfDir = + WxStrToStr(perfDir); + } + if (selectVideoBackend && videoBackendName != wxEmptyString) SConfig::GetInstance().m_LocalCoreStartupParameter.m_strVideoBackend = WxStrToStr(videoBackendName);