JitRegCache: Separate FPURegCache and GPRRegCache into their own files

Keeps them separated from one another and makes it less annoying to find
member functions (which were previously spread out all over the place).
This commit is contained in:
Lioncash 2016-12-20 21:59:53 -05:00
parent 10b4f6a250
commit 1a34376f3a
10 changed files with 293 additions and 236 deletions

View File

@ -196,6 +196,8 @@ if(_M_X86)
PowerPC/Jit64IL/IR_X86.cpp
PowerPC/Jit64IL/JitIL.cpp
PowerPC/Jit64IL/JitIL_Tables.cpp
PowerPC/Jit64/FPURegCache.cpp
PowerPC/Jit64/GPRRegCache.cpp
PowerPC/Jit64/Jit64_Tables.cpp
PowerPC/Jit64/JitAsm.cpp
PowerPC/Jit64/Jit_Branch.cpp

View File

@ -225,6 +225,8 @@
<ClCompile Include="PowerPC\Jit64IL\IR_X86.cpp" />
<ClCompile Include="PowerPC\Jit64IL\JitIL.cpp" />
<ClCompile Include="PowerPC\Jit64IL\JitIL_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit.cpp" />
<ClCompile Include="PowerPC\Jit64\Jit64_Tables.cpp" />
<ClCompile Include="PowerPC\Jit64\JitAsm.cpp" />
@ -427,6 +429,8 @@
<ClInclude Include="PowerPC\Interpreter\Interpreter_Tables.h" />
<ClInclude Include="PowerPC\Jit64IL\JitIL.h" />
<ClInclude Include="PowerPC\Jit64IL\JitIL_Tables.h" />
<ClInclude Include="PowerPC\Jit64\FPURegCache.h" />
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h" />
<ClInclude Include="PowerPC\Jit64\Jit.h" />
<ClInclude Include="PowerPC\Jit64\Jit64_Tables.h" />
<ClInclude Include="PowerPC\Jit64\JitAsm.h" />

View File

@ -666,6 +666,12 @@
<ClCompile Include="PowerPC\Jit64IL\JitIL_Tables.cpp">
<Filter>PowerPC\JitIL</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64\FPURegCache.cpp">
<Filter>PowerPC\Jit64</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64\GPRRegCache.cpp">
<Filter>PowerPC\Jit64</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64\Jit_Branch.cpp">
<Filter>PowerPC\Jit64</Filter>
</ClCompile>
@ -1262,6 +1268,12 @@
<ClInclude Include="PowerPC\Jit64IL\JitIL_Tables.h">
<Filter>PowerPC\JitIL</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64\FPURegCache.h">
<Filter>PowerPC\Jit64</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64\GPRRegCache.h">
<Filter>PowerPC\Jit64</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64\JitRegCache.h">
<Filter>PowerPC\Jit64</Filter>
</ClInclude>

View File

@ -0,0 +1,53 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
using namespace Gen;
void FPURegCache::StoreRegister(size_t preg, const OpArg& new_loc)
{
emit->MOVAPD(new_loc, regs[preg].location.GetSimpleReg());
}
void FPURegCache::LoadRegister(size_t preg, X64Reg new_loc)
{
emit->MOVAPD(new_loc, regs[preg].location);
}
const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
*count = sizeof(allocation_order) / sizeof(X64Reg);
return allocation_order;
}
OpArg FPURegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(ps[reg][0]);
}
BitSet32 FPURegCache::GetRegUtilization()
{
return jit->js.op->gprInReg;
}
BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regs_used;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regs_in = jit->js.op[i].fregsIn;
regs_used |= regs_in;
if (regs_in[preg])
return regs_used;
}
return regs_used;
}

View File

@ -0,0 +1,18 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Jit64/JitRegCache.h"
class FPURegCache final : public RegCache
{
public:
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};

View File

@ -0,0 +1,71 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
using namespace Gen;
void GPRRegCache::StoreRegister(size_t preg, const OpArg& new_loc)
{
emit->MOV(32, new_loc, regs[preg].location);
}
void GPRRegCache::LoadRegister(size_t preg, X64Reg new_loc)
{
emit->MOV(32, ::Gen::R(new_loc), regs[preg].location);
}
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(gpr[reg]);
}
const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocation_order[] = {
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
// this.
#ifdef _WIN32
RSI, RDI, R13, R14, R15, R8,
R9, R10, R11, R12, RCX
#else
R12, R13, R14, R15, RSI, RDI,
R8, R9, R10, R11, RCX
#endif
};
*count = sizeof(allocation_order) / sizeof(X64Reg);
return allocation_order;
}
void GPRRegCache::SetImmediate32(size_t preg, u32 imm_value, bool dirty)
{
// "dirty" can be false to avoid redundantly flushing an immediate when
// processing speculative constants.
DiscardRegContentsIfCached(preg);
regs[preg].away |= dirty;
regs[preg].location = Imm32(imm_value);
}
BitSet32 GPRRegCache::GetRegUtilization()
{
return jit->js.op->gprInReg;
}
BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regs_used;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regs_in = jit->js.op[i].regsIn;
regs_used |= regs_in;
if (regs_in[preg])
return regs_used;
}
return regs_used;
}

View File

@ -0,0 +1,19 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Jit64/JitRegCache.h"
class GPRRegCache final : public RegCache
{
public:
void StoreRegister(size_t preg, const Gen::OpArg& new_loc) override;
void LoadRegister(size_t preg, Gen::X64Reg new_loc) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
void SetImmediate32(size_t preg, u32 imm_value, bool dirty = true);
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};

View File

@ -21,6 +21,8 @@
#include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/FPURegCache.h"
#include "Core/PowerPC/Jit64/GPRRegCache.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64Base.h"

View File

@ -14,7 +14,6 @@
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"
using namespace Gen;
@ -57,128 +56,17 @@ void RegCache::Start()
// But only preload IF written OR reads >= 3
}
void RegCache::UnlockAll()
void RegCache::DiscardRegContentsIfCached(size_t preg)
{
for (auto& reg : regs)
reg.locked = false;
}
void RegCache::UnlockAllX()
{
for (auto& xreg : xregs)
xreg.locked = false;
}
BitSet32 GPRRegCache::GetRegUtilization()
{
return jit->js.op->gprInReg;
}
BitSet32 FPURegCache::GetRegUtilization()
{
return jit->js.op->gprInReg;
}
BitSet32 GPRRegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regsUsed;
for (u32 i = 1; i < lookahead; i++)
if (IsBound(preg))
{
BitSet32 regsIn = jit->js.op[i].regsIn;
regsUsed |= regsIn;
if (regsIn[preg])
return regsUsed;
X64Reg xr = regs[preg].location.GetSimpleReg();
xregs[xr].free = true;
xregs[xr].dirty = false;
xregs[xr].ppcReg = INVALID_REG;
regs[preg].away = false;
regs[preg].location = GetDefaultLocation(preg);
}
return regsUsed;
}
BitSet32 FPURegCache::CountRegsIn(size_t preg, u32 lookahead)
{
BitSet32 regsUsed;
for (u32 i = 1; i < lookahead; i++)
{
BitSet32 regsIn = jit->js.op[i].fregsIn;
regsUsed |= regsIn;
if (regsIn[preg])
return regsUsed;
}
return regsUsed;
}
// Estimate roughly how bad it would be to de-allocate this register. Higher score
// means more bad.
float RegCache::ScoreRegister(X64Reg xr)
{
size_t preg = xregs[xr].ppcReg;
float score = 0;
// If it's not dirty, we don't need a store to write it back to the register file, so
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (xregs[xr].dirty)
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
// writing it back to the register file isn't quite as bad.
if (GetRegUtilization()[preg])
{
// Don't look too far ahead; we don't want to have quadratic compilation times for
// enormous block sizes!
// This actually improves register allocation a tiny bit; I'm not sure why.
u32 lookahead = std::min(jit->js.instructionsLeft, 64);
// Count how many other registers are going to be used before we need this one again.
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
// before this one gets used again.
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
}
return score;
}
X64Reg RegCache::GetFreeXReg()
{
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
{
X64Reg xr = aOrder[i];
if (!xregs[xr].locked && xregs[xr].free)
{
return xr;
}
}
// Okay, not found; run the register allocator heuristic and figure out which register we should
// clobber.
float min_score = std::numeric_limits<float>::max();
X64Reg best_xreg = INVALID_REG;
size_t best_preg = 0;
for (size_t i = 0; i < aCount; i++)
{
X64Reg xreg = (X64Reg)aOrder[i];
size_t preg = xregs[xreg].ppcReg;
if (xregs[xreg].locked || regs[preg].locked)
continue;
float score = ScoreRegister(xreg);
if (score < min_score)
{
min_score = score;
best_xreg = xreg;
best_preg = preg;
}
}
if (best_xreg != INVALID_REG)
{
StoreFromRegister(best_preg);
return best_xreg;
}
// Still no dice? Die!
_assert_msg_(DYNA_REC, 0, "Regcache ran out of regs");
return INVALID_REG;
}
void RegCache::FlushR(X64Reg reg)
@ -191,6 +79,35 @@ void RegCache::FlushR(X64Reg reg)
}
}
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{
for (size_t i = 0; i < xregs.size(); i++)
{
if (xregs[i].locked)
PanicAlert("Someone forgot to unlock X64 reg %zu", i);
}
for (unsigned int i : regsToFlush)
{
if (regs[i].locked)
{
PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
}
if (regs[i].away)
{
if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())
{
StoreFromRegister(i, mode);
}
else
{
_assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
}
}
}
}
int RegCache::SanityCheck() const
{
for (size_t i = 0; i < regs.size(); i++)
@ -214,63 +131,6 @@ int RegCache::SanityCheck() const
return 0;
}
void RegCache::DiscardRegContentsIfCached(size_t preg)
{
if (IsBound(preg))
{
X64Reg xr = regs[preg].location.GetSimpleReg();
xregs[xr].free = true;
xregs[xr].dirty = false;
xregs[xr].ppcReg = INVALID_REG;
regs[preg].away = false;
regs[preg].location = GetDefaultLocation(preg);
}
}
void GPRRegCache::SetImmediate32(size_t preg, u32 immValue, bool dirty)
{
// "dirty" can be false to avoid redundantly flushing an immediate when
// processing speculative constants.
DiscardRegContentsIfCached(preg);
regs[preg].away |= dirty;
regs[preg].location = Imm32(immValue);
}
const X64Reg* GPRRegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocationOrder[] = {
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into
// this.
#ifdef _WIN32
RSI, RDI, R13, R14, R15, R8,
R9, R10, R11, R12, RCX
#else
R12, R13, R14, R15, RSI, RDI,
R8, R9, R10, R11, RCX
#endif
};
*count = sizeof(allocationOrder) / sizeof(X64Reg);
return allocationOrder;
}
const X64Reg* FPURegCache::GetAllocationOrder(size_t* count)
{
static const X64Reg allocationOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12,
XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
*count = sizeof(allocationOrder) / sizeof(X64Reg);
return allocationOrder;
}
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(gpr[reg]);
}
OpArg FPURegCache::GetDefaultLocation(size_t reg) const
{
return PPCSTATE(ps[reg][0]);
}
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)
{
if (regs[preg].away)
@ -351,53 +211,60 @@ void RegCache::StoreFromRegister(size_t i, FlushMode mode)
}
}
void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc)
void RegCache::UnlockAll()
{
emit->MOV(32, ::Gen::R(newLoc), regs[preg].location);
for (auto& reg : regs)
reg.locked = false;
}
void GPRRegCache::StoreRegister(size_t preg, const OpArg& newLoc)
void RegCache::UnlockAllX()
{
emit->MOV(32, newLoc, regs[preg].location);
for (auto& xreg : xregs)
xreg.locked = false;
}
void FPURegCache::LoadRegister(size_t preg, X64Reg newLoc)
X64Reg RegCache::GetFreeXReg()
{
emit->MOVAPD(newLoc, regs[preg].location);
}
void FPURegCache::StoreRegister(size_t preg, const OpArg& newLoc)
{
emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg());
}
void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{
for (size_t i = 0; i < xregs.size(); i++)
size_t aCount;
const X64Reg* aOrder = GetAllocationOrder(&aCount);
for (size_t i = 0; i < aCount; i++)
{
if (xregs[i].locked)
PanicAlert("Someone forgot to unlock X64 reg %zu", i);
}
for (unsigned int i : regsToFlush)
{
if (regs[i].locked)
X64Reg xr = aOrder[i];
if (!xregs[xr].locked && xregs[xr].free)
{
PanicAlert("Someone forgot to unlock PPC reg %u (X64 reg %i).", i, RX(i));
}
if (regs[i].away)
{
if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())
{
StoreFromRegister(i, mode);
}
else
{
_assert_msg_(DYNA_REC, 0, "Jit64 - Flush unhandled case, reg %u PC: %08x", i, PC);
}
return xr;
}
}
// Okay, not found; run the register allocator heuristic and figure out which register we should
// clobber.
float min_score = std::numeric_limits<float>::max();
X64Reg best_xreg = INVALID_REG;
size_t best_preg = 0;
for (size_t i = 0; i < aCount; i++)
{
X64Reg xreg = (X64Reg)aOrder[i];
size_t preg = xregs[xreg].ppcReg;
if (xregs[xreg].locked || regs[preg].locked)
continue;
float score = ScoreRegister(xreg);
if (score < min_score)
{
min_score = score;
best_xreg = xreg;
best_preg = preg;
}
}
if (best_xreg != INVALID_REG)
{
StoreFromRegister(best_preg);
return best_xreg;
}
// Still no dice? Die!
_assert_msg_(DYNA_REC, 0, "Regcache ran out of regs");
return INVALID_REG;
}
int RegCache::NumFreeRegisters()
@ -410,3 +277,35 @@ int RegCache::NumFreeRegisters()
count++;
return count;
}
// Estimate roughly how bad it would be to de-allocate this register. Higher score
// means more bad.
float RegCache::ScoreRegister(X64Reg xr)
{
size_t preg = xregs[xr].ppcReg;
float score = 0;
// If it's not dirty, we don't need a store to write it back to the register file, so
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (xregs[xr].dirty)
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
// writing it back to the register file isn't quite as bad.
if (GetRegUtilization()[preg])
{
// Don't look too far ahead; we don't want to have quadratic compilation times for
// enormous block sizes!
// This actually improves register allocation a tiny bit; I'm not sure why.
u32 lookahead = std::min(jit->js.instructionsLeft, 64);
// Count how many other registers are going to be used before we need this one again.
u32 regs_in_count = CountRegsIn(preg, lookahead).Count();
// Totally ad-hoc heuristic to bias based on how many other registers we'll need
// before this one gets used again.
score += 1 + 2 * (5 - log2f(1 + (float)regs_in_count));
}
return score;
}

View File

@ -153,26 +153,3 @@ public:
Gen::X64Reg GetFreeXReg();
int NumFreeRegisters();
};
class GPRRegCache final : public RegCache
{
public:
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
void SetImmediate32(size_t preg, u32 immValue, bool dirty = true);
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};
class FPURegCache final : public RegCache
{
public:
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
void LoadRegister(size_t preg, Gen::X64Reg newLoc) override;
const Gen::X64Reg* GetAllocationOrder(size_t* count) override;
Gen::OpArg GetDefaultLocation(size_t reg) const override;
BitSet32 GetRegUtilization() override;
BitSet32 CountRegsIn(size_t preg, u32 lookahead) override;
};