mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-14 21:37:52 -07:00
Merge pull request #2809 from Sonicadvance1/aarch64_cycle_counter_profiling
[AArch64] Support profiling by cycle counters if they are available to EL0
This commit is contained in:
commit
fe1c6cba36
@ -1102,6 +1102,12 @@ static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CR
|
|||||||
case FIELD_FPSR:
|
case FIELD_FPSR:
|
||||||
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;
|
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;
|
||||||
break;
|
break;
|
||||||
|
case FIELD_PMCR_EL0:
|
||||||
|
o0 = 3; op1 = 3; CRn = 9; CRm = 6; op2 = 0;
|
||||||
|
break;
|
||||||
|
case FIELD_PMCCNTR_EL0:
|
||||||
|
o0 = 3; op1 = 3; CRn = 9; CRm = 7; op2 = 0;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, false, "Invalid PStateField to do a register move from/to");
|
_assert_msg_(DYNA_REC, false, "Invalid PStateField to do a register move from/to");
|
||||||
break;
|
break;
|
||||||
|
@ -172,6 +172,8 @@ enum PStateField
|
|||||||
FIELD_DAIFSet,
|
FIELD_DAIFSet,
|
||||||
FIELD_DAIFClr,
|
FIELD_DAIFClr,
|
||||||
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
|
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
|
||||||
|
FIELD_PMCR_EL0,
|
||||||
|
FIELD_PMCCNTR_EL0,
|
||||||
FIELD_FPCR = 0x340,
|
FIELD_FPCR = 0x340,
|
||||||
FIELD_FPSR = 0x341,
|
FIELD_FPSR = 0x341,
|
||||||
};
|
};
|
||||||
@ -809,6 +811,7 @@ public:
|
|||||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void FCVTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
@ -15,6 +15,15 @@
|
|||||||
using namespace Arm64Gen;
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
|
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
|
||||||
|
static bool HasCycleCounters()
|
||||||
|
{
|
||||||
|
// Bit needs to be set to support cycle counters
|
||||||
|
const u32 PMUSERENR_CR = 0x4;
|
||||||
|
u32 reg;
|
||||||
|
asm ("mrs %[val], PMUSERENR_EL0"
|
||||||
|
: [val] "=r" (reg));
|
||||||
|
return !!(reg & PMUSERENR_CR);
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::Init()
|
void JitArm64::Init()
|
||||||
{
|
{
|
||||||
@ -34,6 +43,8 @@ void JitArm64::Init()
|
|||||||
code_block.m_gpa = &js.gpa;
|
code_block.m_gpa = &js.gpa;
|
||||||
code_block.m_fpa = &js.fpa;
|
code_block.m_fpa = &js.fpa;
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
|
|
||||||
|
m_supports_cycle_counter = HasCycleCounters();
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ClearCache()
|
void JitArm64::ClearCache()
|
||||||
@ -233,26 +244,65 @@ void JitArm64::DumpCode(const u8* start, const u8* end)
|
|||||||
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
|
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitArm64::EmitResetCycleCounters()
|
||||||
|
{
|
||||||
|
const u32 PMCR_EL0_E = 1;
|
||||||
|
const u32 PMCR_EL0_P = 2;
|
||||||
|
const u32 PMCR_EL0_C = 4;
|
||||||
|
const u32 PMCR_EL0_LC = 0x40;
|
||||||
|
_MSR(FIELD_PMCR_EL0, X0);
|
||||||
|
MOVI2R(X1, PMCR_EL0_E |
|
||||||
|
PMCR_EL0_P |
|
||||||
|
PMCR_EL0_C |
|
||||||
|
PMCR_EL0_LC);
|
||||||
|
ORR(X0, X0, X1);
|
||||||
|
MRS(X0, FIELD_PMCR_EL0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
|
||||||
|
{
|
||||||
|
_MSR(FIELD_PMCCNTR_EL0, reg);
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::BeginTimeProfile(JitBlock* b)
|
void JitArm64::BeginTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
b->ticCounter = 0;
|
b->ticCounter = 0;
|
||||||
b->ticStart = 0;
|
b->ticStart = 0;
|
||||||
b->ticStop = 0;
|
b->ticStop = 0;
|
||||||
|
|
||||||
|
if (m_supports_cycle_counter)
|
||||||
|
{
|
||||||
|
EmitResetCycleCounters();
|
||||||
|
EmitGetCycles(X1);
|
||||||
|
MOVI2R(X0, (u64)&b->ticStart);
|
||||||
|
STR(INDEX_UNSIGNED, X1, X0, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
MOVI2R(X1, (u64)QueryPerformanceCounter);
|
MOVI2R(X1, (u64)QueryPerformanceCounter);
|
||||||
MOVI2R(X0, (u64)&b->ticStart);
|
MOVI2R(X0, (u64)&b->ticStart);
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EndTimeProfile(JitBlock* b)
|
void JitArm64::EndTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
|
if (m_supports_cycle_counter)
|
||||||
|
{
|
||||||
|
EmitGetCycles(X2);
|
||||||
|
MOVI2R(X0, (u64)&b->ticStart);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
MOVI2R(X1, (u64)QueryPerformanceCounter);
|
MOVI2R(X1, (u64)QueryPerformanceCounter);
|
||||||
MOVI2R(X0, (u64)&b->ticStop);
|
MOVI2R(X0, (u64)&b->ticStop);
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
|
|
||||||
MOVI2R(X0, (u64)&b->ticStart);
|
MOVI2R(X0, (u64)&b->ticStart);
|
||||||
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start
|
|
||||||
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
|
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
|
||||||
|
}
|
||||||
|
|
||||||
|
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start
|
||||||
LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter
|
LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter
|
||||||
SUB(X2, X2, X1);
|
SUB(X2, X2, X1);
|
||||||
ADD(X3, X3, X2);
|
ADD(X3, X3, X2);
|
||||||
|
@ -238,6 +238,12 @@ private:
|
|||||||
Arm64Gen::ARM64CodeBlock farcode;
|
Arm64Gen::ARM64CodeBlock farcode;
|
||||||
u8* nearcode; // Backed up when we switch to far code.
|
u8* nearcode; // Backed up when we switch to far code.
|
||||||
|
|
||||||
|
// Do we support cycle counter profiling?
|
||||||
|
bool m_supports_cycle_counter;
|
||||||
|
|
||||||
|
void EmitResetCycleCounters();
|
||||||
|
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
|
||||||
|
|
||||||
// Simple functions to switch between near and far code emitting
|
// Simple functions to switch between near and far code emitting
|
||||||
void SwitchToFarCode()
|
void SwitchToFarCode()
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user