From 2f9e9bf1fc85b5a4c4e4edf56c6bcd1f1aa41a73 Mon Sep 17 00:00:00 2001 From: Fiora Date: Thu, 4 Sep 2014 21:50:55 -0700 Subject: [PATCH] JIT: completely inline timer reading Should be a bit faster on games that heavily use the timer. --- Source/Core/Core/CoreTiming.cpp | 8 +++---- Source/Core/Core/CoreTiming.h | 4 ++++ .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 21 ++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index d64a39b51d..30ac74585a 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -50,13 +50,13 @@ static Event *eventPool = nullptr; int slicelength; static int maxSliceLength = MAX_SLICE_LENGTH; -static s64 globalTimer; static s64 idledCycles; - static u32 fakeDecStartValue; static u64 fakeDecStartTicks; -static u64 fakeTBStartValue; -static u64 fakeTBStartTicks; + +s64 globalTimer; +u64 fakeTBStartValue; +u64 fakeTBStartTicks; static int ev_lost; diff --git a/Source/Core/Core/CoreTiming.h b/Source/Core/Core/CoreTiming.h index 15a3a09d89..7eb3784ae1 100644 --- a/Source/Core/Core/CoreTiming.h +++ b/Source/Core/Core/CoreTiming.h @@ -25,6 +25,10 @@ class PointerWrap; namespace CoreTiming { +extern s64 globalTimer; +extern u64 fakeTBStartValue; +extern u64 fakeTBStartTicks; + void Init(); void Shutdown(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index af2fe33bb8..75d6da740c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -173,15 +173,25 @@ void Jit64::mfspr(UGeckoInstruction inst) // typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne // to deal with possible timer wraparound. This makes the second two (out of three) completely // redundant for the JIT. - u32 registersInUse = CallerSavedRegistersInUse(); u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO; - ABI_PushRegistersAndAdjustStack(registersInUse, false); - ABI_CallFunction((void *)&SystemTimers::GetFakeTimeBase); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + gpr.FlushLockX(EDX); + + // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the + // cost of calling out to C for this is actually significant. + MOV(64, R(RAX), M(&CoreTiming::globalTimer)); + SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks)); + // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 + MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL)); + MUL(64, R(RDX)); + MOV(64, R(RAX), M(&CoreTiming::fakeTBStartValue)); + SHR(64, R(RDX), Imm8(3)); // The timer can change within a long block, so add in any difference if (offset > 0) - ADD(64, R(RAX), Imm32(offset)); + LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset)); + else + ADD(64, R(RAX), R(RDX)); MOV(64, M(&TL), R(RAX)); + // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // if we can. u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F); @@ -225,6 +235,7 @@ void Jit64::mfspr(UGeckoInstruction inst) break; } gpr.UnlockAll(); + gpr.UnlockAllX(); } void Jit64::mtmsr(UGeckoInstruction inst)