JIT: completely inline timer reading

Should be a bit faster on games that heavily use the timer.
This commit is contained in:
Fiora 2014-09-04 21:50:55 -07:00
parent b583879c2a
commit 2f9e9bf1fc
3 changed files with 24 additions and 9 deletions

View File

@ -50,13 +50,13 @@ static Event *eventPool = nullptr;
int slicelength; int slicelength;
static int maxSliceLength = MAX_SLICE_LENGTH; static int maxSliceLength = MAX_SLICE_LENGTH;
static s64 globalTimer;
static s64 idledCycles; static s64 idledCycles;
static u32 fakeDecStartValue; static u32 fakeDecStartValue;
static u64 fakeDecStartTicks; static u64 fakeDecStartTicks;
static u64 fakeTBStartValue;
static u64 fakeTBStartTicks; s64 globalTimer;
u64 fakeTBStartValue;
u64 fakeTBStartTicks;
static int ev_lost; static int ev_lost;

View File

@ -25,6 +25,10 @@ class PointerWrap;
namespace CoreTiming namespace CoreTiming
{ {
extern s64 globalTimer;
extern u64 fakeTBStartValue;
extern u64 fakeTBStartTicks;
void Init(); void Init();
void Shutdown(); void Shutdown();

View File

@ -173,15 +173,25 @@ void Jit64::mfspr(UGeckoInstruction inst)
// typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne // typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne
// to deal with possible timer wraparound. This makes the second two (out of three) completely // to deal with possible timer wraparound. This makes the second two (out of three) completely
// redundant for the JIT. // redundant for the JIT.
u32 registersInUse = CallerSavedRegistersInUse();
u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO; u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO;
ABI_PushRegistersAndAdjustStack(registersInUse, false); gpr.FlushLockX(EDX);
ABI_CallFunction((void *)&SystemTimers::GetFakeTimeBase);
ABI_PopRegistersAndAdjustStack(registersInUse, false); // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
MOV(64, R(RAX), M(&CoreTiming::globalTimer));
SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks));
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, R(RDX));
MOV(64, R(RAX), M(&CoreTiming::fakeTBStartValue));
SHR(64, R(RDX), Imm8(3));
// The timer can change within a long block, so add in any difference // The timer can change within a long block, so add in any difference
if (offset > 0) if (offset > 0)
ADD(64, R(RAX), Imm32(offset)); LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset));
else
ADD(64, R(RAX), R(RDX));
MOV(64, M(&TL), R(RAX)); MOV(64, M(&TL), R(RAX));
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can. // if we can.
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F); u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F);
@ -225,6 +235,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
break; break;
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
void Jit64::mtmsr(UGeckoInstruction inst) void Jit64::mtmsr(UGeckoInstruction inst)