mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-29 00:59:44 -06:00
JIT: fix handling of PC in dispatcher/block cache.
Specifically, don't make any assumptions about what effective addresses are used for code, and correctly handle changes to MSR.DR/MSR.IR. (Split off from dynamic-bat.)
This commit is contained in:
@ -55,6 +55,10 @@
|
||||
#define JITDISABLE(setting) \
|
||||
FALLBACK_IF(SConfig::GetInstance().bJITOff || SConfig::GetInstance().setting)
|
||||
|
||||
class JitBase;
|
||||
|
||||
extern JitBase* jit;
|
||||
|
||||
class JitBase : public CPUCoreBase
|
||||
{
|
||||
protected:
|
||||
@ -125,6 +129,7 @@ public:
|
||||
JitOptions jo;
|
||||
JitState js;
|
||||
|
||||
static const u8* Dispatch() { return jit->GetBlockCache()->Dispatch(); };
|
||||
virtual JitBaseBlockCache* GetBlockCache() = 0;
|
||||
|
||||
virtual void Jit(u32 em_address) = 0;
|
||||
@ -147,8 +152,6 @@ public:
|
||||
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
|
||||
};
|
||||
|
||||
extern JitBase* jit;
|
||||
|
||||
void Jit(u32 em_address);
|
||||
|
||||
// Merged routines that should be moved somewhere better
|
||||
|
@ -34,26 +34,15 @@ bool JitBaseBlockCache::IsFull() const
|
||||
|
||||
void JitBaseBlockCache::Init()
|
||||
{
|
||||
if (m_initialized)
|
||||
{
|
||||
PanicAlert("JitBaseBlockCache::Init() - iCache is already initialized");
|
||||
return;
|
||||
}
|
||||
|
||||
JitRegister::Init(SConfig::GetInstance().m_perfDir);
|
||||
|
||||
iCache.fill(JIT_ICACHE_INVALID_BYTE);
|
||||
iCacheEx.fill(JIT_ICACHE_INVALID_BYTE);
|
||||
iCacheVMEM.fill(JIT_ICACHE_INVALID_BYTE);
|
||||
iCache.fill(0);
|
||||
Clear();
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::Shutdown()
|
||||
{
|
||||
num_blocks = 0;
|
||||
m_initialized = false;
|
||||
|
||||
JitRegister::Shutdown();
|
||||
}
|
||||
@ -80,7 +69,8 @@ void JitBaseBlockCache::Clear()
|
||||
valid_block.ClearAll();
|
||||
|
||||
num_blocks = 0;
|
||||
blockCodePointers.fill(nullptr);
|
||||
blocks[0].msrBits = 0xFFFFFFFF;
|
||||
blocks[0].invalid = true;
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::Reset()
|
||||
@ -103,7 +93,9 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address)
|
||||
{
|
||||
JitBlock& b = blocks[num_blocks];
|
||||
b.invalid = false;
|
||||
b.originalAddress = em_address;
|
||||
b.effectiveAddress = em_address;
|
||||
b.physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address;
|
||||
b.msrBits = MSR & JitBlock::JIT_CACHE_MSR_MASK;
|
||||
b.linkData.clear();
|
||||
num_blocks++; // commit the current block
|
||||
return num_blocks - 1;
|
||||
@ -111,13 +103,23 @@ int JitBaseBlockCache::AllocateBlock(u32 em_address)
|
||||
|
||||
void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8* code_ptr)
|
||||
{
|
||||
blockCodePointers[block_num] = code_ptr;
|
||||
JitBlock& b = blocks[block_num];
|
||||
if (start_block_map.count(b.physicalAddress))
|
||||
{
|
||||
// We already have a block at this address; invalidate the old block.
|
||||
// This should be very rare. This will only happen if the same block
|
||||
// is called both with DR/IR enabled or disabled.
|
||||
WARN_LOG(DYNA_REC, "Invalidating compiled block at same address %08x", b.physicalAddress);
|
||||
int old_block_num = start_block_map[b.physicalAddress];
|
||||
const JitBlock& old_b = blocks[old_block_num];
|
||||
block_map.erase(
|
||||
std::make_pair(old_b.physicalAddress + 4 * old_b.originalSize - 1, old_b.physicalAddress));
|
||||
DestroyBlock(old_block_num, true);
|
||||
}
|
||||
start_block_map[b.physicalAddress] = block_num;
|
||||
FastLookupEntryForAddress(b.effectiveAddress) = block_num;
|
||||
|
||||
std::memcpy(GetICachePtr(b.originalAddress), &block_num, sizeof(u32));
|
||||
|
||||
// Convert the logical address to a physical address for the block map
|
||||
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
|
||||
u32 pAddr = b.physicalAddress;
|
||||
|
||||
for (u32 block = pAddr / 32; block <= (pAddr + (b.originalSize - 1) * 4) / 32; ++block)
|
||||
valid_block.Set(block);
|
||||
@ -135,46 +137,62 @@ void JitBaseBlockCache::FinalizeBlock(int block_num, bool block_link, const u8*
|
||||
LinkBlockExits(block_num);
|
||||
}
|
||||
|
||||
JitRegister::Register(blockCodePointers[block_num], b.codeSize, "JIT_PPC_%08x",
|
||||
b.originalAddress);
|
||||
JitRegister::Register(b.checkedEntry, b.codeSize, "JIT_PPC_%08x", b.physicalAddress);
|
||||
}
|
||||
|
||||
const u8** JitBaseBlockCache::GetCodePointers()
|
||||
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr, u32 msr)
|
||||
{
|
||||
return blockCodePointers.data();
|
||||
}
|
||||
u32 translated_addr = addr;
|
||||
if (UReg_MSR(msr).IR)
|
||||
{
|
||||
auto translated = PowerPC::JitCache_TranslateAddress(addr);
|
||||
if (!translated.valid)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
translated_addr = translated.address;
|
||||
}
|
||||
|
||||
u8* JitBaseBlockCache::GetICachePtr(u32 addr)
|
||||
{
|
||||
if (addr & JIT_ICACHE_VMEM_BIT)
|
||||
return &jit->GetBlockCache()->iCacheVMEM[addr & JIT_ICACHE_MASK];
|
||||
|
||||
if (addr & JIT_ICACHE_EXRAM_BIT)
|
||||
return &jit->GetBlockCache()->iCacheEx[addr & JIT_ICACHEEX_MASK];
|
||||
|
||||
return &jit->GetBlockCache()->iCache[addr & JIT_ICACHE_MASK];
|
||||
}
|
||||
|
||||
int JitBaseBlockCache::GetBlockNumberFromStartAddress(u32 addr)
|
||||
{
|
||||
u32 inst;
|
||||
std::memcpy(&inst, GetICachePtr(addr), sizeof(u32));
|
||||
|
||||
if (inst & 0xfc000000) // definitely not a JIT block
|
||||
auto map_result = start_block_map.find(translated_addr);
|
||||
if (map_result == start_block_map.end())
|
||||
return -1;
|
||||
|
||||
if ((int)inst >= num_blocks)
|
||||
int block_num = map_result->second;
|
||||
const JitBlock& b = blocks[block_num];
|
||||
if (b.invalid)
|
||||
return -1;
|
||||
|
||||
if (blocks[inst].originalAddress != addr)
|
||||
if (b.effectiveAddress != addr)
|
||||
return -1;
|
||||
|
||||
return inst;
|
||||
if (b.msrBits != (msr & JitBlock::JIT_CACHE_MSR_MASK))
|
||||
return -1;
|
||||
return block_num;
|
||||
}
|
||||
|
||||
CompiledCode JitBaseBlockCache::GetCompiledCodeFromBlock(int block_num)
|
||||
void JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
|
||||
{
|
||||
return (CompiledCode)blockCodePointers[block_num];
|
||||
int block_num = GetBlockNumberFromStartAddress(addr, msr);
|
||||
if (block_num < 0)
|
||||
{
|
||||
Jit(addr);
|
||||
}
|
||||
else
|
||||
{
|
||||
FastLookupEntryForAddress(addr) = block_num;
|
||||
LinkBlock(block_num);
|
||||
}
|
||||
}
|
||||
|
||||
const u8* JitBaseBlockCache::Dispatch()
|
||||
{
|
||||
int block_num = FastLookupEntryForAddress(PC);
|
||||
|
||||
while (blocks[block_num].effectiveAddress != PC ||
|
||||
blocks[block_num].msrBits != (MSR & JitBlock::JIT_CACHE_MSR_MASK))
|
||||
{
|
||||
MoveBlockIntoFastCache(PC, MSR & JitBlock::JIT_CACHE_MSR_MASK);
|
||||
block_num = FastLookupEntryForAddress(PC);
|
||||
}
|
||||
|
||||
return blocks[block_num].normalEntry;
|
||||
}
|
||||
|
||||
// Block linker
|
||||
@ -195,7 +213,7 @@ void JitBaseBlockCache::LinkBlockExits(int i)
|
||||
{
|
||||
if (!e.linkStatus)
|
||||
{
|
||||
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress);
|
||||
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress, b.msrBits);
|
||||
if (destinationBlock != -1)
|
||||
{
|
||||
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
|
||||
@ -208,39 +226,35 @@ void JitBaseBlockCache::LinkBlockExits(int i)
|
||||
void JitBaseBlockCache::LinkBlock(int i)
|
||||
{
|
||||
LinkBlockExits(i);
|
||||
JitBlock& b = blocks[i];
|
||||
// equal_range(b) returns pair<iterator,iterator> representing the range
|
||||
// of element with key b
|
||||
auto ppp = links_to.equal_range(b.originalAddress);
|
||||
|
||||
if (ppp.first == ppp.second)
|
||||
return;
|
||||
const JitBlock& b = blocks[i];
|
||||
auto ppp = links_to.equal_range(b.effectiveAddress);
|
||||
|
||||
for (auto iter = ppp.first; iter != ppp.second; ++iter)
|
||||
{
|
||||
// PanicAlert("Linking block %i to block %i", iter->second, i);
|
||||
LinkBlockExits(iter->second);
|
||||
const JitBlock& b2 = blocks[iter->second];
|
||||
if (b.msrBits == b2.msrBits)
|
||||
LinkBlockExits(iter->second);
|
||||
}
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::UnlinkBlock(int i)
|
||||
{
|
||||
JitBlock& b = blocks[i];
|
||||
auto ppp = links_to.equal_range(b.originalAddress);
|
||||
|
||||
if (ppp.first == ppp.second)
|
||||
return;
|
||||
auto ppp = links_to.equal_range(b.effectiveAddress);
|
||||
|
||||
for (auto iter = ppp.first; iter != ppp.second; ++iter)
|
||||
{
|
||||
JitBlock& sourceBlock = blocks[iter->second];
|
||||
if (sourceBlock.msrBits != b.msrBits)
|
||||
continue;
|
||||
|
||||
for (auto& e : sourceBlock.linkData)
|
||||
{
|
||||
if (e.exitAddress == b.originalAddress)
|
||||
if (e.exitAddress == b.effectiveAddress)
|
||||
e.linkStatus = false;
|
||||
}
|
||||
}
|
||||
links_to.erase(b.originalAddress);
|
||||
links_to.erase(b.effectiveAddress);
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
|
||||
@ -258,20 +272,23 @@ void JitBaseBlockCache::DestroyBlock(int block_num, bool invalidate)
|
||||
return;
|
||||
}
|
||||
b.invalid = true;
|
||||
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
|
||||
start_block_map.erase(b.physicalAddress);
|
||||
FastLookupEntryForAddress(b.effectiveAddress) = 0;
|
||||
|
||||
UnlinkBlock(block_num);
|
||||
|
||||
// Send anyone who tries to run this block back to the dispatcher.
|
||||
// Not entirely ideal, but .. pretty good.
|
||||
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
||||
WriteDestroyBlock(b.checkedEntry, b.originalAddress);
|
||||
WriteDestroyBlock(b.checkedEntry, b.effectiveAddress);
|
||||
}
|
||||
|
||||
void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced)
|
||||
{
|
||||
// Convert the logical address to a physical address for the block map
|
||||
u32 pAddr = address & 0x1FFFFFFF;
|
||||
auto translated = PowerPC::JitCache_TranslateAddress(address);
|
||||
if (!translated.valid)
|
||||
return;
|
||||
u32 pAddr = translated.address;
|
||||
|
||||
// Optimize the common case of length == 32 which is used by Interpreter::dcb*
|
||||
bool destroy_block = true;
|
||||
@ -288,20 +305,11 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for
|
||||
// address
|
||||
if (destroy_block)
|
||||
{
|
||||
std::map<std::pair<u32, u32>, u32>::iterator it1 = block_map.lower_bound(
|
||||
std::make_pair(pAddr, 0)),
|
||||
it2 = it1;
|
||||
while (it2 != block_map.end() && it2->first.second < pAddr + length)
|
||||
auto it = block_map.lower_bound(std::make_pair(pAddr, 0));
|
||||
while (it != block_map.end() && it->first.second < pAddr + length)
|
||||
{
|
||||
JitBlock& b = blocks[it2->second];
|
||||
std::memcpy(GetICachePtr(b.originalAddress), &JIT_ICACHE_INVALID_WORD, sizeof(u32));
|
||||
|
||||
DestroyBlock(it2->second, true);
|
||||
++it2;
|
||||
}
|
||||
if (it1 != it2)
|
||||
{
|
||||
block_map.erase(it1, it2);
|
||||
DestroyBlock(it->second, true);
|
||||
it = block_map.erase(it);
|
||||
}
|
||||
|
||||
// If the code was actually modified, we need to clear the relevant entries from the
|
||||
|
@ -12,32 +12,55 @@
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
static const u32 JIT_ICACHE_SIZE = 0x2000000;
|
||||
static const u32 JIT_ICACHE_MASK = 0x1ffffff;
|
||||
static const u32 JIT_ICACHEEX_SIZE = 0x4000000;
|
||||
static const u32 JIT_ICACHEEX_MASK = 0x3ffffff;
|
||||
static const u32 JIT_ICACHE_EXRAM_BIT = 0x10000000;
|
||||
static const u32 JIT_ICACHE_VMEM_BIT = 0x20000000;
|
||||
|
||||
// This corresponds to opcode 5 which is invalid in PowerPC
|
||||
static const u32 JIT_ICACHE_INVALID_BYTE = 0x80;
|
||||
static const u32 JIT_ICACHE_INVALID_WORD = 0x80808080;
|
||||
|
||||
// A JitBlock is block of compiled code which corresponds to the PowerPC
|
||||
// code at a given address.
|
||||
//
|
||||
// The notion of the address of a block is a bit complicated because of the
|
||||
// way address translation works, but basically it's the combination of an
|
||||
// effective address, the address translation bits in MSR, and the physical
|
||||
// address.
|
||||
struct JitBlock
|
||||
{
|
||||
enum
|
||||
{
|
||||
// Mask for the MSR bits which determine whether a compiled block
|
||||
// is valid (MSR.IR and MSR.DR, the address translation bits).
|
||||
JIT_CACHE_MSR_MASK = 0x30,
|
||||
};
|
||||
|
||||
// A special entry point for block linking; usually used to check the
|
||||
// downcount.
|
||||
const u8* checkedEntry;
|
||||
// The normal entry point for the block, returned by Dispatch().
|
||||
const u8* normalEntry;
|
||||
|
||||
u32 originalAddress;
|
||||
// The effective address (PC) for the beginning of the block.
|
||||
u32 effectiveAddress;
|
||||
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
|
||||
u32 msrBits;
|
||||
// The physical address of the code represented by this block.
|
||||
// Various maps in the cache are indexed by this (start_block_map,
|
||||
// block_map, and valid_block in particular). This is useful because of
|
||||
// of the way the instruction cache works on PowerPC.
|
||||
u32 physicalAddress;
|
||||
// The number of bytes of JIT'ed code contained in this block. Mostly
|
||||
// useful for logging.
|
||||
u32 codeSize;
|
||||
// The number of PPC instructions represented by this block. Mostly
|
||||
// useful for logging.
|
||||
u32 originalSize;
|
||||
int runCount; // for profiling.
|
||||
|
||||
// Whether this struct refers to a valid block. This is mostly useful as
|
||||
// a debugging aid.
|
||||
// FIXME: Change current users of invalid bit to assertions?
|
||||
bool invalid;
|
||||
|
||||
// Information about exits to a known address from this block.
|
||||
// This is used to implement block linking.
|
||||
struct LinkData
|
||||
{
|
||||
u8* exitPtrs; // to be able to rewrite the exit jum
|
||||
u8* exitPtrs; // to be able to rewrite the exit jump
|
||||
u32 exitAddress;
|
||||
bool linkStatus; // is it already linked?
|
||||
};
|
||||
@ -59,7 +82,12 @@ class ValidBlockBitSet final
|
||||
public:
|
||||
enum
|
||||
{
|
||||
VALID_BLOCK_MASK_SIZE = 0x20000000 / 32,
|
||||
// ValidBlockBitSet covers the whole 32-bit address-space in 32-byte
|
||||
// chunks.
|
||||
// FIXME: Maybe we can get away with less? There isn't any actual
|
||||
// RAM in most of this space.
|
||||
VALID_BLOCK_MASK_SIZE = (1ULL << 32) / 32,
|
||||
// The number of elements in the allocated array. Each u32 contains 32 bits.
|
||||
VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32
|
||||
};
|
||||
// Directly accessed by Jit64.
|
||||
@ -79,33 +107,53 @@ public:
|
||||
|
||||
class JitBaseBlockCache
|
||||
{
|
||||
enum
|
||||
{
|
||||
MAX_NUM_BLOCKS = 65536 * 2,
|
||||
};
|
||||
public:
|
||||
static constexpr int MAX_NUM_BLOCKS = 65536 * 2;
|
||||
static constexpr u32 iCache_Num_Elements = 0x10000;
|
||||
static constexpr u32 iCache_Mask = iCache_Num_Elements - 1;
|
||||
|
||||
std::array<const u8*, MAX_NUM_BLOCKS> blockCodePointers;
|
||||
std::array<JitBlock, MAX_NUM_BLOCKS> blocks;
|
||||
private:
|
||||
// We store the metadata of all blocks in a linear way within this array.
|
||||
std::array<JitBlock, MAX_NUM_BLOCKS> blocks; // number -> JitBlock
|
||||
int num_blocks;
|
||||
std::multimap<u32, int> links_to;
|
||||
|
||||
// links_to hold all exit points of all valid blocks in a reverse way.
|
||||
// It is used to query all blocks which links to an address.
|
||||
std::multimap<u32, int> links_to; // destination_PC -> number
|
||||
|
||||
// Map indexed by the physical memory location.
|
||||
// It is used to invalidate blocks based on memory location.
|
||||
std::map<std::pair<u32, u32>, u32> block_map; // (end_addr, start_addr) -> number
|
||||
|
||||
// Map indexed by the physical address of the entry point.
|
||||
// This is used to query the block based on the current PC in a slow way.
|
||||
// TODO: This is redundant with block_map, and both should be a multimap.
|
||||
std::map<u32, u32> start_block_map; // start_addr -> number
|
||||
|
||||
// This bitsets shows which cachelines overlap with any blocks.
|
||||
// It is used to provide a fast way to query if no icache invalidation is needed.
|
||||
ValidBlockBitSet valid_block;
|
||||
|
||||
bool m_initialized;
|
||||
// This array is indexed with the masked PC and likely holds the correct block id.
|
||||
// This is used as a fast cache of start_block_map used in the assembly dispatcher.
|
||||
std::array<int, iCache_Num_Elements> iCache; // start_addr & mask -> number
|
||||
|
||||
void LinkBlockExits(int i);
|
||||
void LinkBlock(int i);
|
||||
void UnlinkBlock(int i);
|
||||
|
||||
u8* GetICachePtr(u32 addr);
|
||||
void DestroyBlock(int block_num, bool invalidate);
|
||||
|
||||
void MoveBlockIntoFastCache(u32 em_address, u32 msr);
|
||||
|
||||
// Fast but risky block lookup based on iCache.
|
||||
int& FastLookupEntryForAddress(u32 address) { return iCache[(address >> 2) & iCache_Mask]; }
|
||||
// Virtual for overloaded
|
||||
virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
|
||||
virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
|
||||
|
||||
public:
|
||||
JitBaseBlockCache() : num_blocks(0), m_initialized(false) {}
|
||||
JitBaseBlockCache() : num_blocks(0) {}
|
||||
virtual ~JitBaseBlockCache() {}
|
||||
int AllocateBlock(u32 em_address);
|
||||
void FinalizeBlock(int block_num, bool block_link, const u8* code_ptr);
|
||||
@ -119,18 +167,20 @@ public:
|
||||
|
||||
// Code Cache
|
||||
JitBlock* GetBlock(int block_num);
|
||||
JitBlock* GetBlocks() { return blocks.data(); }
|
||||
int* GetICache() { return iCache.data(); }
|
||||
int GetNumBlocks() const;
|
||||
const u8** GetCodePointers();
|
||||
std::array<u8, JIT_ICACHE_SIZE> iCache;
|
||||
std::array<u8, JIT_ICACHEEX_SIZE> iCacheEx;
|
||||
std::array<u8, JIT_ICACHE_SIZE> iCacheVMEM;
|
||||
|
||||
// Fast way to get a block. Only works on the first ppc instruction of a block.
|
||||
int GetBlockNumberFromStartAddress(u32 em_address);
|
||||
// Look for the block in the slow but accurate way.
|
||||
// This function shall be used if FastLookupEntryForAddress() failed.
|
||||
int GetBlockNumberFromStartAddress(u32 em_address, u32 msr);
|
||||
|
||||
CompiledCode GetCompiledCodeFromBlock(int block_num);
|
||||
// Get the normal entry for the block associated with the current program
|
||||
// counter. This will JIT code if necessary. (This is the reference
|
||||
// implementation; high-performance JITs will want to use a custom
|
||||
// assembly version.)
|
||||
const u8* Dispatch();
|
||||
|
||||
// DOES NOT WORK CORRECTLY WITH INLINING
|
||||
void InvalidateICache(u32 address, const u32 length, bool forced);
|
||||
|
||||
u32* GetBlockBitSet() const { return valid_block.m_valid_block.get(); }
|
||||
|
Reference in New Issue
Block a user