mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-25 07:09:48 -06:00
JitArm64: Reimplement aarch64 cycle counters.
CNTVCT_EL0 is force-enabled on all linux plattforms. Windows is untested, but as this is the best way to get *any* low overhead performance counters, they likely use it as well.
This commit is contained in:
@ -1218,6 +1218,14 @@ void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field)
|
|||||||
EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
|
EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARM64XEmitter::CNTVCT(Arm64Gen::ARM64Reg Rt)
|
||||||
|
{
|
||||||
|
_assert_msg_(DYNA_REC, Is64Bit(Rt), "CNTVCT: Rt must be 64-bit");
|
||||||
|
|
||||||
|
// MRS <Xt>, CNTVCT_EL0 ; Read CNTVCT_EL0 into Xt
|
||||||
|
EncodeSystemInst(3 | 4, 3, 0xe, 0, 2, DecodeReg(Rt));
|
||||||
|
}
|
||||||
|
|
||||||
void ARM64XEmitter::HINT(SystemHint op)
|
void ARM64XEmitter::HINT(SystemHint op)
|
||||||
{
|
{
|
||||||
EncodeSystemInst(0, 3, 2, 0, op, WSP);
|
EncodeSystemInst(0, 3, 2, 0, op, WSP);
|
||||||
|
@ -603,9 +603,9 @@ public:
|
|||||||
|
|
||||||
// System
|
// System
|
||||||
void _MSR(PStateField field, u8 imm);
|
void _MSR(PStateField field, u8 imm);
|
||||||
|
|
||||||
void _MSR(PStateField field, ARM64Reg Rt);
|
void _MSR(PStateField field, ARM64Reg Rt);
|
||||||
void MRS(ARM64Reg Rt, PStateField field);
|
void MRS(ARM64Reg Rt, PStateField field);
|
||||||
|
void CNTVCT(ARM64Reg Rt);
|
||||||
|
|
||||||
void HINT(SystemHint op);
|
void HINT(SystemHint op);
|
||||||
void CLREX();
|
void CLREX();
|
||||||
|
@ -36,15 +36,6 @@ constexpr size_t SAFE_STACK_SIZE = 512 * 1024;
|
|||||||
constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above)
|
constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above)
|
||||||
constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE;
|
constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE;
|
||||||
|
|
||||||
static bool HasCycleCounters()
|
|
||||||
{
|
|
||||||
// Bit needs to be set to support cycle counters
|
|
||||||
const u32 PMUSERENR_CR = 0x4;
|
|
||||||
u32 reg;
|
|
||||||
asm("mrs %[val], PMUSERENR_EL0" : [val] "=r"(reg));
|
|
||||||
return !!(reg & PMUSERENR_CR);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::Init()
|
void JitArm64::Init()
|
||||||
{
|
{
|
||||||
InitializeInstructionTables();
|
InitializeInstructionTables();
|
||||||
@ -72,8 +63,6 @@ void JitArm64::Init()
|
|||||||
|
|
||||||
AllocStack();
|
AllocStack();
|
||||||
GenerateAsm();
|
GenerateAsm();
|
||||||
|
|
||||||
m_supports_cycle_counter = HasCycleCounters();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||||
@ -514,73 +503,47 @@ void JitArm64::DumpCode(const u8* start, const u8* end)
|
|||||||
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
|
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EmitResetCycleCounters()
|
|
||||||
{
|
|
||||||
const u32 PMCR_EL0_E = 1;
|
|
||||||
const u32 PMCR_EL0_P = 2;
|
|
||||||
const u32 PMCR_EL0_C = 4;
|
|
||||||
const u32 PMCR_EL0_LC = 0x40;
|
|
||||||
_MSR(FIELD_PMCR_EL0, X10);
|
|
||||||
MOVI2R(X11, PMCR_EL0_E | PMCR_EL0_P | PMCR_EL0_C | PMCR_EL0_LC);
|
|
||||||
ORR(X10, X10, X11);
|
|
||||||
MRS(X10, FIELD_PMCR_EL0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
|
|
||||||
{
|
|
||||||
_MSR(FIELD_PMCCNTR_EL0, reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::BeginTimeProfile(JitBlock* b)
|
void JitArm64::BeginTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
MOVP2R(X0, &b->profile_data);
|
MOVP2R(X0, &b->profile_data);
|
||||||
LDR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
|
LDR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
ADD(X1, X1, 1);
|
ADD(X1, X1, 1);
|
||||||
|
|
||||||
if (m_supports_cycle_counter)
|
// Fetch the current counter register
|
||||||
{
|
CNTVCT(X2);
|
||||||
EmitResetCycleCounters();
|
|
||||||
EmitGetCycles(X2);
|
|
||||||
|
|
||||||
// stores runCount and ticStart
|
// stores runCount and ticStart
|
||||||
STP(INDEX_UNSIGNED, X1, X2, X0, offsetof(JitBlock::ProfileData, runCount));
|
STP(INDEX_SIGNED, X1, X2, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
STR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
|
|
||||||
|
|
||||||
MOVP2R(X1, &QueryPerformanceCounter);
|
|
||||||
ADD(X0, X0, offsetof(JitBlock::ProfileData, ticStart));
|
|
||||||
BLR(X1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EndTimeProfile(JitBlock* b)
|
void JitArm64::EndTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
MOVP2R(X20, &b->profile_data);
|
ARM64Reg WA = gpr.GetReg();
|
||||||
if (m_supports_cycle_counter)
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
{
|
ARM64Reg WB = gpr.GetReg();
|
||||||
EmitGetCycles(X2);
|
ARM64Reg XB = EncodeRegTo64(WB);
|
||||||
}
|
ARM64Reg WC = gpr.GetReg();
|
||||||
else
|
ARM64Reg XC = EncodeRegTo64(WC);
|
||||||
{
|
ARM64Reg WD = gpr.GetReg();
|
||||||
MOVP2R(X1, &QueryPerformanceCounter);
|
ARM64Reg XD = EncodeRegTo64(WD);
|
||||||
ADD(X0, X20, offsetof(JitBlock::ProfileData, ticStop));
|
|
||||||
BLR(X1);
|
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, X2, X20, offsetof(JitBlock::ProfileData, ticStop));
|
// Fetch the current counter register
|
||||||
}
|
CNTVCT(XB);
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, X1, X20, offsetof(JitBlock::ProfileData, ticStart));
|
MOVP2R(XA, &b->profile_data);
|
||||||
|
|
||||||
|
LDR(INDEX_UNSIGNED, XC, XA, offsetof(JitBlock::ProfileData, ticStart));
|
||||||
|
SUB(XB, XB, XC);
|
||||||
|
|
||||||
// loads ticCounter and downcountCounter
|
// loads ticCounter and downcountCounter
|
||||||
LDP(INDEX_UNSIGNED, X3, X4, X20, offsetof(JitBlock::ProfileData, ticCounter));
|
LDP(INDEX_SIGNED, XC, XD, XA, offsetof(JitBlock::ProfileData, ticCounter));
|
||||||
SUB(X2, X2, X1);
|
ADD(XC, XC, XB);
|
||||||
ADD(X3, X3, X2);
|
ADDI2R(XD, XD, js.downcountAmount);
|
||||||
ADDI2R(X4, X4, js.downcountAmount);
|
|
||||||
|
|
||||||
// stores ticCounter and downcountCounter
|
// stores ticCounter and downcountCounter
|
||||||
STP(INDEX_UNSIGNED, X3, X4, X20, offsetof(JitBlock::ProfileData, ticCounter));
|
STP(INDEX_SIGNED, XC, XD, XA, offsetof(JitBlock::ProfileData, ticCounter));
|
||||||
|
|
||||||
|
gpr.Unlock(WA, WB, WC, WD);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::Run()
|
void JitArm64::Run()
|
||||||
|
@ -174,9 +174,6 @@ private:
|
|||||||
static void InitializeInstructionTables();
|
static void InitializeInstructionTables();
|
||||||
void CompileInstruction(PPCAnalyst::CodeOp& op);
|
void CompileInstruction(PPCAnalyst::CodeOp& op);
|
||||||
|
|
||||||
void EmitResetCycleCounters();
|
|
||||||
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
|
|
||||||
|
|
||||||
// Simple functions to switch between near and far code emitting
|
// Simple functions to switch between near and far code emitting
|
||||||
void SwitchToFarCode()
|
void SwitchToFarCode()
|
||||||
{
|
{
|
||||||
@ -253,9 +250,6 @@ private:
|
|||||||
Arm64Gen::ARM64CodeBlock farcode;
|
Arm64Gen::ARM64CodeBlock farcode;
|
||||||
u8* nearcode; // Backed up when we switch to far code.
|
u8* nearcode; // Backed up when we switch to far code.
|
||||||
|
|
||||||
// Do we support cycle counter profiling?
|
|
||||||
bool m_supports_cycle_counter;
|
|
||||||
|
|
||||||
bool m_enable_blr_optimization;
|
bool m_enable_blr_optimization;
|
||||||
bool m_cleanup_after_stackfault = false;
|
bool m_cleanup_after_stackfault = false;
|
||||||
u8* m_stack_base = nullptr;
|
u8* m_stack_base = nullptr;
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
namespace Profiler
|
namespace Profiler
|
||||||
{
|
{
|
||||||
bool g_ProfileBlocks;
|
bool g_ProfileBlocks = false;
|
||||||
|
|
||||||
void WriteProfileResults(const std::string& filename)
|
void WriteProfileResults(const std::string& filename)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user