mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 13:57:57 -07:00
Fix all the current issues with the AArch64 register cache.
Fixes all the current issues I've been experiencing. Scaled back the register cache idea for now so I can actually work on some real instructions. Tested this work with unit tests so I know it works. Unit tests are pretty great things.
This commit is contained in:
parent
64b21a4812
commit
e883a6fb2e
@ -13,8 +13,12 @@ void Arm64RegCache::Init(ARM64XEmitter *emitter)
|
||||
GetAllocationOrder();
|
||||
}
|
||||
|
||||
ARM64Reg Arm64RegCache::GetReg(void)
|
||||
ARM64Reg Arm64RegCache::GetReg()
|
||||
{
|
||||
// If we have no registers left, dump the most stale register first
|
||||
if (!GetUnlockedRegisterCount())
|
||||
FlushMostStaleRegister();
|
||||
|
||||
for (auto& it : m_host_registers)
|
||||
{
|
||||
if (!it.IsLocked())
|
||||
@ -29,6 +33,15 @@ ARM64Reg Arm64RegCache::GetReg(void)
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
u32 Arm64RegCache::GetUnlockedRegisterCount()
|
||||
{
|
||||
u32 unlocked_registers = 0;
|
||||
for (auto& it : m_host_registers)
|
||||
if (!it.IsLocked())
|
||||
++unlocked_registers;
|
||||
return unlocked_registers;
|
||||
}
|
||||
|
||||
void Arm64RegCache::LockRegister(ARM64Reg host_reg)
|
||||
{
|
||||
auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg);
|
||||
@ -101,6 +114,81 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg)
|
||||
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
|
||||
}
|
||||
|
||||
void Arm64GPRCache::FlushRegister(u32 preg)
|
||||
{
|
||||
u32 base_reg = preg;
|
||||
OpArg& reg = m_guest_registers[preg];
|
||||
if (reg.GetType() == REG_REG)
|
||||
{
|
||||
ARM64Reg host_reg = reg.GetReg();
|
||||
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
|
||||
Unlock(host_reg);
|
||||
|
||||
reg.Flush();
|
||||
}
|
||||
else if (reg.GetType() == REG_IMM)
|
||||
{
|
||||
ARM64Reg host_reg = GetReg();
|
||||
|
||||
m_emit->MOVI2R(host_reg, reg.GetImm());
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
|
||||
|
||||
Unlock(host_reg);
|
||||
|
||||
reg.Flush();
|
||||
}
|
||||
else if (reg.GetType() == REG_AWAY)
|
||||
{
|
||||
u32 next_reg = 0;
|
||||
if (reg.GetAwayLocation() == REG_LOW)
|
||||
next_reg = base_reg + 1;
|
||||
else
|
||||
next_reg = base_reg - 1;
|
||||
OpArg& reg2 = m_guest_registers[next_reg];
|
||||
ARM64Reg host_reg = reg.GetAwayReg();
|
||||
ARM64Reg host_reg_1 = reg.GetReg();
|
||||
ARM64Reg host_reg_2 = reg2.GetReg();
|
||||
// Flush if either of these shared registers are used.
|
||||
if (host_reg_1 == INVALID_REG)
|
||||
{
|
||||
// We never loaded this register
|
||||
// We've got to test the state of our shared register
|
||||
// Currently it is always reg+1
|
||||
if (host_reg_2 == INVALID_REG)
|
||||
{
|
||||
// We didn't load either of these registers
|
||||
// This can happen in cases where we had to flush register state
|
||||
// or if we hit an interpreted instruction before we could use it
|
||||
// Dump the whole thing in one go and flush both registers
|
||||
|
||||
// 64bit host register will store 2 32bit store registers in one go
|
||||
if (reg.GetAwayLocation() == REG_LOW)
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[base_reg]));
|
||||
else
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[next_reg]));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Alright, bottom register isn't used, but top one is
|
||||
// Only store the top one
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[next_reg]));
|
||||
Unlock(host_reg_2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[base_reg]));
|
||||
Unlock(host_reg_1);
|
||||
}
|
||||
// Flush both registers
|
||||
reg.Flush();
|
||||
reg2.Flush();
|
||||
Unlock(DecodeReg(host_reg));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
{
|
||||
for (int i = 0; i < 32; ++i)
|
||||
@ -124,26 +212,12 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
// Has to be flushed if it isn't in a callee saved register
|
||||
ARM64Reg host_reg = m_guest_registers[i].GetReg();
|
||||
if (flush || !IsCalleeSaved(host_reg))
|
||||
{
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i]));
|
||||
Unlock(host_reg);
|
||||
|
||||
m_guest_registers[i].Flush();
|
||||
}
|
||||
FlushRegister(i);
|
||||
}
|
||||
else if (m_guest_registers[i].GetType() == REG_IMM)
|
||||
{
|
||||
if (flush)
|
||||
{
|
||||
ARM64Reg host_reg = GetReg();
|
||||
|
||||
m_emit->MOVI2R(host_reg, m_guest_registers[i].GetImm());
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i]));
|
||||
|
||||
Unlock(host_reg);
|
||||
|
||||
m_guest_registers[i].Flush();
|
||||
}
|
||||
FlushRegister(i);
|
||||
}
|
||||
else if (m_guest_registers[i].GetType() == REG_AWAY)
|
||||
{
|
||||
@ -173,39 +247,7 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
!IsCalleeSaved(host_reg_1) ||
|
||||
!IsCalleeSaved(host_reg_2))
|
||||
{
|
||||
|
||||
if (host_reg_1 == INVALID_REG)
|
||||
{
|
||||
// We never loaded this register
|
||||
// We've got to test the state of our shared register
|
||||
// Currently it is always reg+1
|
||||
if (host_reg_2 == INVALID_REG)
|
||||
{
|
||||
// We didn't load either of these registers
|
||||
// This can happen in cases where we had to flush register state
|
||||
// or if we hit an interpreted instruction before we could use it
|
||||
// Dump the whole thing in one go and flush both registers
|
||||
|
||||
// 64bit host register will store 2 32bit store registers in one go
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Alright, bottom register isn't used, but top one is
|
||||
// Only store the top one
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[i + 1]));
|
||||
Unlock(host_reg_2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[i]));
|
||||
Unlock(host_reg_1);
|
||||
}
|
||||
// Flush both registers
|
||||
m_guest_registers[i].Flush();
|
||||
m_guest_registers[i + 1].Flush();
|
||||
Unlock(DecodeReg(host_reg));
|
||||
FlushRegister(i); // Will flush both pairs of registers
|
||||
}
|
||||
// Skip the next register since we've handled it here
|
||||
++i;
|
||||
@ -216,6 +258,9 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
ARM64Reg Arm64GPRCache::R(u32 preg)
|
||||
{
|
||||
OpArg& reg = m_guest_registers[preg];
|
||||
IncrementAllUsed();
|
||||
reg.ResetLastUsed();
|
||||
|
||||
switch (reg.GetType())
|
||||
{
|
||||
case REG_REG: // already in a reg
|
||||
@ -225,6 +270,8 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
|
||||
{
|
||||
ARM64Reg host_reg = GetReg();
|
||||
m_emit->MOVI2R(host_reg, reg.GetImm());
|
||||
reg.LoadToReg(host_reg);
|
||||
return host_reg;
|
||||
}
|
||||
break;
|
||||
case REG_AWAY: // Register is away in a shared register
|
||||
@ -232,22 +279,57 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
|
||||
// Let's do the voodoo that we dodo
|
||||
if (reg.GetReg() == INVALID_REG)
|
||||
{
|
||||
// Alright, we need to move to a valid location
|
||||
ARM64Reg host_reg = GetReg();
|
||||
reg.LoadAwayToReg(host_reg);
|
||||
|
||||
// Alright, we need to extract from our away register
|
||||
// To our new 32bit register
|
||||
if (reg.GetAwayLocation() == REG_LOW)
|
||||
{
|
||||
// We are in the low bits
|
||||
// Just move it over to the low bits of the new register
|
||||
m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 0, 31);
|
||||
OpArg& upper_reg = m_guest_registers[preg + 1];
|
||||
if (upper_reg.GetType() == REG_REG)
|
||||
{
|
||||
// If the upper reg is already moved away, just claim this one as ours now
|
||||
ARM64Reg host_reg = reg.GetAwayReg();
|
||||
reg.LoadToReg(DecodeReg(host_reg));
|
||||
return host_reg;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Top register is still loaded
|
||||
// Make sure to move to a new register
|
||||
ARM64Reg host_reg = GetReg();
|
||||
ARM64Reg current_reg = reg.GetAwayReg();
|
||||
reg.LoadToReg(host_reg);
|
||||
|
||||
// We are in the low bits
|
||||
// Just move it over to the low bits of the new register
|
||||
m_emit->UBFM(EncodeRegTo64(host_reg), current_reg, 0, 31);
|
||||
return host_reg;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// We are in the high bits
|
||||
m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 32, 63);
|
||||
OpArg& lower_reg = m_guest_registers[preg - 1];
|
||||
if (lower_reg.GetType() == REG_REG)
|
||||
{
|
||||
// If the lower register is moved away, claim this one as ours
|
||||
ARM64Reg host_reg = reg.GetAwayReg();
|
||||
reg.LoadToReg(DecodeReg(host_reg));
|
||||
|
||||
// Make sure to move our register from the high bits to the low bits
|
||||
m_emit->UBFM(EncodeRegTo64(host_reg), host_reg, 32, 63);
|
||||
return host_reg;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Load this register in to the new low bits
|
||||
// We are no longer away
|
||||
ARM64Reg host_reg = GetReg();
|
||||
ARM64Reg current_reg = reg.GetAwayReg();
|
||||
reg.LoadToReg(host_reg);
|
||||
|
||||
// We are in the high bits
|
||||
m_emit->UBFM(EncodeRegTo64(host_reg), current_reg, 32, 63);
|
||||
return host_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -259,10 +341,8 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
|
||||
break;
|
||||
case REG_NOTLOADED: // Register isn't loaded at /all/
|
||||
{
|
||||
// This is kind of annoying, we shouldn't have gotten here
|
||||
// This can happen with instructions that use multiple registers(eg lmw)
|
||||
// The PPCAnalyst needs to be modified to handle these cases
|
||||
_dbg_assert_msg_(DYNA_REC, false, "Hit REG_NOTLOADED type oparg. Fix the PPCAnalyst");
|
||||
// This is a bit annoying. We try to keep these preloaded as much as possible
|
||||
// This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics
|
||||
ARM64Reg host_reg = GetReg();
|
||||
reg.LoadToReg(host_reg);
|
||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
|
||||
@ -277,7 +357,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void Arm64GPRCache::GetAllocationOrder(void)
|
||||
void Arm64GPRCache::GetAllocationOrder()
|
||||
{
|
||||
// Callee saved registers first in hopes that we will keep everything stored there first
|
||||
const std::vector<ARM64Reg> allocation_order =
|
||||
@ -292,6 +372,24 @@ void Arm64GPRCache::GetAllocationOrder(void)
|
||||
m_host_registers.push_back(HostReg(reg));
|
||||
}
|
||||
|
||||
void Arm64GPRCache::FlushMostStaleRegister()
|
||||
{
|
||||
u32 most_stale_preg = 0;
|
||||
u32 most_stale_amount = 0;
|
||||
for (u32 i = 0; i < 32; ++i)
|
||||
{
|
||||
u32 last_used = m_guest_registers[i].GetLastUsed();
|
||||
if (last_used > most_stale_amount &&
|
||||
m_guest_registers[i].GetType() != REG_IMM &&
|
||||
m_guest_registers[i].GetType() != REG_NOTLOADED)
|
||||
{
|
||||
most_stale_preg = i;
|
||||
most_stale_amount = last_used;
|
||||
}
|
||||
}
|
||||
FlushRegister(most_stale_preg);
|
||||
}
|
||||
|
||||
// FPR Cache
|
||||
void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
|
||||
{
|
||||
@ -303,7 +401,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg)
|
||||
// XXX: return a host reg holding a guest register
|
||||
}
|
||||
|
||||
void Arm64FPRCache::GetAllocationOrder(void)
|
||||
void Arm64FPRCache::GetAllocationOrder()
|
||||
{
|
||||
const std::vector<ARM64Reg> allocation_order =
|
||||
{
|
||||
@ -317,3 +415,8 @@ void Arm64FPRCache::GetAllocationOrder(void)
|
||||
m_host_registers.push_back(HostReg(reg));
|
||||
}
|
||||
|
||||
void Arm64FPRCache::FlushMostStaleRegister()
|
||||
{
|
||||
// XXX: Flush a register
|
||||
}
|
||||
|
||||
|
@ -42,10 +42,9 @@ class OpArg
|
||||
{
|
||||
public:
|
||||
OpArg()
|
||||
: m_type(REG_NOTLOADED), m_reg(INVALID_REG),
|
||||
m_value(0), m_last_used(0)
|
||||
{
|
||||
m_type = REG_NOTLOADED;
|
||||
m_reg = INVALID_REG;
|
||||
m_value = 0;
|
||||
}
|
||||
|
||||
RegType GetType()
|
||||
@ -73,30 +72,40 @@ public:
|
||||
{
|
||||
m_type = REG_REG;
|
||||
m_reg = reg;
|
||||
|
||||
m_away_reg = INVALID_REG;
|
||||
}
|
||||
void LoadToAway(ARM64Reg reg, RegLocation location)
|
||||
{
|
||||
m_type = REG_AWAY;
|
||||
m_reg = INVALID_REG;
|
||||
m_away_reg = reg;
|
||||
m_away_location = location;
|
||||
}
|
||||
void LoadAwayToReg(ARM64Reg reg)
|
||||
{
|
||||
// We are still an away type
|
||||
// We just are also in another register
|
||||
m_reg = reg;
|
||||
|
||||
m_reg = INVALID_REG;
|
||||
}
|
||||
void LoadToImm(u32 imm)
|
||||
{
|
||||
m_type = REG_IMM;
|
||||
m_value = imm;
|
||||
|
||||
m_reg = INVALID_REG;
|
||||
m_away_reg = INVALID_REG;
|
||||
}
|
||||
void Flush()
|
||||
{
|
||||
// Invalidate any previous information
|
||||
m_type = REG_NOTLOADED;
|
||||
m_reg = INVALID_REG;
|
||||
m_away_reg = INVALID_REG;
|
||||
|
||||
// Arbitrarily large value that won't roll over on a lot of increments
|
||||
m_last_used = 0xFFFF;
|
||||
}
|
||||
|
||||
u32 GetLastUsed() { return m_last_used; }
|
||||
void ResetLastUsed() { m_last_used = 0; }
|
||||
void IncrementLastUsed() { ++m_last_used; }
|
||||
|
||||
private:
|
||||
// For REG_REG
|
||||
RegType m_type; // store type
|
||||
@ -110,6 +119,8 @@ private:
|
||||
|
||||
// For REG_IMM
|
||||
u32 m_value; // IMM value
|
||||
|
||||
u32 m_last_used;
|
||||
};
|
||||
|
||||
class HostReg
|
||||
@ -117,10 +128,10 @@ class HostReg
|
||||
public:
|
||||
HostReg() : m_reg(INVALID_REG), m_locked(false) {}
|
||||
HostReg(ARM64Reg reg) : m_reg(reg), m_locked(false) {}
|
||||
bool IsLocked(void) { return m_locked; }
|
||||
void Lock(void) { m_locked = true; }
|
||||
void Unlock(void) { m_locked = false; }
|
||||
ARM64Reg GetReg(void) { return m_reg; }
|
||||
bool IsLocked() { return m_locked; }
|
||||
void Lock() { m_locked = true; }
|
||||
void Unlock() { m_locked = false; }
|
||||
ARM64Reg GetReg() { return m_reg; }
|
||||
|
||||
bool operator==(const ARM64Reg& reg)
|
||||
{
|
||||
@ -135,7 +146,7 @@ private:
|
||||
class Arm64RegCache
|
||||
{
|
||||
public:
|
||||
Arm64RegCache(void) : m_emit(nullptr), m_reg_stats(nullptr) {};
|
||||
Arm64RegCache() : m_emit(nullptr), m_reg_stats(nullptr) {};
|
||||
virtual ~Arm64RegCache() {};
|
||||
|
||||
void Init(ARM64XEmitter *emitter);
|
||||
@ -151,7 +162,7 @@ public:
|
||||
|
||||
// Returns a temporary register for use
|
||||
// Requires unlocking after done
|
||||
ARM64Reg GetReg(void);
|
||||
ARM64Reg GetReg();
|
||||
|
||||
// Locks a register so a cache cannot use it
|
||||
// Useful for function calls
|
||||
@ -177,7 +188,10 @@ public:
|
||||
|
||||
protected:
|
||||
// Get the order of the host registers
|
||||
virtual void GetAllocationOrder(void) = 0;
|
||||
virtual void GetAllocationOrder() = 0;
|
||||
|
||||
// Flushes the most stale register
|
||||
virtual void FlushMostStaleRegister() = 0;
|
||||
|
||||
// Lock a register
|
||||
void LockRegister(ARM64Reg host_reg);
|
||||
@ -185,6 +199,9 @@ protected:
|
||||
// Unlock a register
|
||||
void UnlockRegister(ARM64Reg host_reg);
|
||||
|
||||
// Get available host registers
|
||||
u32 GetUnlockedRegisterCount();
|
||||
|
||||
// Code emitter
|
||||
ARM64XEmitter *m_emit;
|
||||
|
||||
@ -220,7 +237,10 @@ public:
|
||||
|
||||
protected:
|
||||
// Get the order of the host registers
|
||||
void GetAllocationOrder(void);
|
||||
void GetAllocationOrder();
|
||||
|
||||
// Flushes the most stale register
|
||||
void FlushMostStaleRegister();
|
||||
|
||||
// Our guest GPRs
|
||||
// PowerPC has 32 GPRs
|
||||
@ -228,6 +248,14 @@ protected:
|
||||
|
||||
private:
|
||||
bool IsCalleeSaved(ARM64Reg reg);
|
||||
|
||||
void IncrementAllUsed()
|
||||
{
|
||||
for (auto& reg : m_guest_registers)
|
||||
reg.IncrementLastUsed();
|
||||
}
|
||||
|
||||
void FlushRegister(u32 preg);
|
||||
};
|
||||
|
||||
class Arm64FPRCache : public Arm64RegCache
|
||||
@ -243,7 +271,10 @@ public:
|
||||
|
||||
protected:
|
||||
// Get the order of the host registers
|
||||
void GetAllocationOrder(void);
|
||||
void GetAllocationOrder();
|
||||
|
||||
// Flushes the most stale register
|
||||
void FlushMostStaleRegister();
|
||||
|
||||
// Our guest FPRs
|
||||
// Gekko has 32 paired registers(32x2)
|
||||
|
Loading…
Reference in New Issue
Block a user