Merge pull request #1804 from FioraAeterna/fastermmu2_master

MMU: various improvements, bugfixes, optimizations
This commit is contained in:
Dolphin Bot
2015-01-07 00:49:58 +01:00
29 changed files with 746 additions and 812 deletions

View File

@ -181,20 +181,26 @@ void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1)
// Pass two registers as parameters. // Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2)
{ {
MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2); MOVTwo(64, ABI_PARAM1, reg1, 0, ABI_PARAM2, reg2);
ABI_CallFunction(func); ABI_CallFunction(func);
} }
void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2) void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, s32 offset1, Gen::X64Reg dst2, Gen::X64Reg src2)
{ {
if (dst1 == src2 && dst2 == src1) if (dst1 == src2 && dst2 == src1)
{ {
XCHG(bits, R(src1), R(src2)); XCHG(bits, R(src1), R(src2));
if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
} }
else if (src2 != dst1) else if (src2 != dst1)
{ {
if (dst1 != src1) if (dst1 != src1 && offset1)
LEA(bits, dst1, MDisp(src1, offset1));
else if (dst1 != src1)
MOV(bits, R(dst1), R(src1)); MOV(bits, R(dst1), R(src1));
else if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
if (dst2 != src2) if (dst2 != src2)
MOV(bits, R(dst2), R(src2)); MOV(bits, R(dst2), R(src2));
} }
@ -202,8 +208,12 @@ void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg
{ {
if (dst2 != src2) if (dst2 != src2)
MOV(bits, R(dst2), R(src2)); MOV(bits, R(dst2), R(src2));
if (dst1 != src1) if (dst1 != src1 && offset1)
LEA(bits, dst1, MDisp(src1, offset1));
else if (dst1 != src1)
MOV(bits, R(dst1), R(src1)); MOV(bits, R(dst1), R(src1));
else if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
} }
} }

View File

@ -888,7 +888,7 @@ public:
void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
// Helper method for the above, or can be used separately. // Helper method for the above, or can be used separately.
void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2); void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, s32 offset, Gen::X64Reg dst2, Gen::X64Reg src2);
// Saves/restores the registers and adjusts the stack to be aligned as // Saves/restores the registers and adjusts the stack to be aligned as
// required by the ABI, where the previous alignment was as specified. // required by the ABI, where the previous alignment was as specified.

View File

@ -28,7 +28,7 @@ std::string PPCDebugInterface::Disassemble(unsigned int address)
if (!Memory::IsRAMAddress(address, true, true)) if (!Memory::IsRAMAddress(address, true, true))
{ {
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || !((address & JIT_ICACHE_VMEM_BIT) && if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || !((address & JIT_ICACHE_VMEM_BIT) &&
Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION))) Memory::TranslateAddress<Memory::FLAG_NO_EXCEPTION>(address)))
{ {
return "(No RAM here)"; return "(No RAM here)";
} }

View File

@ -188,9 +188,9 @@ bool AreMemoryBreakpointsActivated()
#endif #endif
} }
u32 Read_Instruction(const u32 em_address) u32 Read_Instruction(const u32 address)
{ {
UGeckoInstruction inst = ReadUnchecked_U32(em_address); UGeckoInstruction inst = ReadUnchecked_U32(address);
return inst.hex; return inst.hex;
} }
@ -235,48 +235,48 @@ void Memset(const u32 _Address, const u8 _iValue, const u32 _iLength)
} }
} }
void ClearCacheLine(const u32 _Address) void ClearCacheLine(const u32 address)
{ {
// FIXME: does this do the right thing if dcbz is run on hardware memory, e.g. // FIXME: does this do the right thing if dcbz is run on hardware memory, e.g.
// the FIFO? Do games even do that? Probably not, but we should try to be correct... // the FIFO? Do games even do that? Probably not, but we should try to be correct...
for (u32 i = 0; i < 32; i += 8) for (u32 i = 0; i < 32; i += 8)
Write_U64(0, _Address + i); Write_U64(0, address + i);
} }
void DMA_LCToMemory(const u32 _MemAddr, const u32 _CacheAddr, const u32 _iNumBlocks) void DMA_LCToMemory(const u32 memAddr, const u32 cacheAddr, const u32 numBlocks)
{ {
const u8* src = m_pL1Cache + (_CacheAddr & 0x3FFFF); const u8* src = m_pL1Cache + (cacheAddr & 0x3FFFF);
u8* dst = GetPointer(_MemAddr); u8* dst = GetPointer(memAddr);
if ((dst != nullptr) && (src != nullptr) && (_MemAddr & 3) == 0 && (_CacheAddr & 3) == 0) if ((dst != nullptr) && (src != nullptr) && (memAddr & 3) == 0 && (cacheAddr & 3) == 0)
{ {
memcpy(dst, src, 32 * _iNumBlocks); memcpy(dst, src, 32 * numBlocks);
} }
else else
{ {
for (u32 i = 0; i < 32 * _iNumBlocks; i++) for (u32 i = 0; i < 32 * numBlocks; i++)
{ {
u8 Temp = Read_U8(_CacheAddr + i); u8 Temp = Read_U8(cacheAddr + i);
Write_U8(Temp, _MemAddr + i); Write_U8(Temp, memAddr + i);
} }
} }
} }
void DMA_MemoryToLC(const u32 _CacheAddr, const u32 _MemAddr, const u32 _iNumBlocks) void DMA_MemoryToLC(const u32 cacheAddr, const u32 memAddr, const u32 numBlocks)
{ {
const u8* src = GetPointer(_MemAddr); const u8* src = GetPointer(memAddr);
u8* dst = m_pL1Cache + (_CacheAddr & 0x3FFFF); u8* dst = m_pL1Cache + (cacheAddr & 0x3FFFF);
if ((dst != nullptr) && (src != nullptr) && (_MemAddr & 3) == 0 && (_CacheAddr & 3) == 0) if ((dst != nullptr) && (src != nullptr) && (memAddr & 3) == 0 && (cacheAddr & 3) == 0)
{ {
memcpy(dst, src, 32 * _iNumBlocks); memcpy(dst, src, 32 * numBlocks);
} }
else else
{ {
for (u32 i = 0; i < 32 * _iNumBlocks; i++) for (u32 i = 0; i < 32 * numBlocks; i++)
{ {
u8 Temp = Read_U8(_MemAddr + i); u8 Temp = Read_U8(memAddr + i);
Write_U8(Temp, _CacheAddr + i); Write_U8(Temp, cacheAddr + i);
} }
} }
} }
@ -301,16 +301,16 @@ std::string GetString(u32 em_address, size_t size)
// GetPointer must always return an address in the bottom 32 bits of address space, so that 64-bit // GetPointer must always return an address in the bottom 32 bits of address space, so that 64-bit
// programs don't have problems directly addressing any part of memory. // programs don't have problems directly addressing any part of memory.
// TODO re-think with respect to other BAT setups... // TODO re-think with respect to other BAT setups...
u8* GetPointer(const u32 _Address) u8* GetPointer(const u32 address)
{ {
switch (_Address >> 28) switch (address >> 28)
{ {
case 0x0: case 0x0:
case 0x8: case 0x8:
if ((_Address & 0xfffffff) < REALRAM_SIZE) if ((address & 0xfffffff) < REALRAM_SIZE)
return m_pRAM + (_Address & RAM_MASK); return m_pRAM + (address & RAM_MASK);
case 0xc: case 0xc:
switch (_Address >> 24) switch (address >> 24)
{ {
case 0xcc: case 0xcc:
case 0xcd: case 0xcd:
@ -320,8 +320,8 @@ u8* GetPointer(const u32 _Address)
break; break;
default: default:
if ((_Address & 0xfffffff) < REALRAM_SIZE) if ((address & 0xfffffff) < REALRAM_SIZE)
return m_pRAM + (_Address & RAM_MASK); return m_pRAM + (address & RAM_MASK);
} }
case 0x1: case 0x1:
@ -329,53 +329,53 @@ u8* GetPointer(const u32 _Address)
case 0xd: case 0xd:
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii)
{ {
if ((_Address & 0xfffffff) < EXRAM_SIZE) if ((address & 0xfffffff) < EXRAM_SIZE)
return m_pEXRAM + (_Address & EXRAM_MASK); return m_pEXRAM + (address & EXRAM_MASK);
} }
else else
break; break;
case 0xe: case 0xe:
if (_Address < (0xE0000000 + L1_CACHE_SIZE)) if (address < (0xE0000000 + L1_CACHE_SIZE))
return m_pL1Cache + (_Address & L1_CACHE_MASK); return m_pL1Cache + (address & L1_CACHE_MASK);
else else
break; break;
default: default:
if (bFakeVMEM) if (bFakeVMEM)
return m_pFakeVMEM + (_Address & FAKEVMEM_MASK); return m_pFakeVMEM + (address & FAKEVMEM_MASK);
} }
ERROR_LOG(MEMMAP, "Unknown Pointer %#8x PC %#8x LR %#8x", _Address, PC, LR); ERROR_LOG(MEMMAP, "Unknown Pointer %#8x PC %#8x LR %#8x", address, PC, LR);
return nullptr; return nullptr;
} }
bool IsRAMAddress(const u32 addr, bool allow_locked_cache, bool allow_fake_vmem) bool IsRAMAddress(const u32 address, bool allow_locked_cache, bool allow_fake_vmem)
{ {
switch ((addr >> 24) & 0xFC) switch ((address >> 24) & 0xFC)
{ {
case 0x00: case 0x00:
case 0x80: case 0x80:
case 0xC0: case 0xC0:
if ((addr & 0x1FFFFFFF) < RAM_SIZE) if ((address & 0x1FFFFFFF) < RAM_SIZE)
return true; return true;
else else
return false; return false;
case 0x10: case 0x10:
case 0x90: case 0x90:
case 0xD0: case 0xD0:
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && (addr & 0x0FFFFFFF) < EXRAM_SIZE) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && (address & 0x0FFFFFFF) < EXRAM_SIZE)
return true; return true;
else else
return false; return false;
case 0xE0: case 0xE0:
if (allow_locked_cache && addr - 0xE0000000 < L1_CACHE_SIZE) if (allow_locked_cache && address - 0xE0000000 < L1_CACHE_SIZE)
return true; return true;
else else
return false; return false;
case 0x7C: case 0x7C:
if (allow_fake_vmem && bFakeVMEM && addr >= 0x7E000000) if (allow_fake_vmem && bFakeVMEM && address >= 0x7E000000)
return true; return true;
else else
return false; return false;

View File

@ -74,64 +74,57 @@ void Clear();
bool AreMemoryBreakpointsActivated(); bool AreMemoryBreakpointsActivated();
// ONLY for use by GUI // ONLY for use by GUI
u8 ReadUnchecked_U8(const u32 _Address); u8 ReadUnchecked_U8(const u32 address);
u32 ReadUnchecked_U32(const u32 _Address); u32 ReadUnchecked_U32(const u32 address);
void WriteUnchecked_U8(const u8 _Data, const u32 _Address); void WriteUnchecked_U8(const u8 var, const u32 address);
void WriteUnchecked_U32(const u32 _Data, const u32 _Address); void WriteUnchecked_U32(const u32 var, const u32 address);
bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false, bool allow_fake_vmem = false); bool IsRAMAddress(const u32 address, bool allow_locked_cache = false, bool allow_fake_vmem = false);
// used by interpreter to read instructions, uses iCache // used by interpreter to read instructions, uses iCache
u32 Read_Opcode(const u32 _Address); u32 Read_Opcode(const u32 address);
// this is used by Debugger a lot. // this is used by Debugger a lot.
// For now, just reads from memory! // For now, just reads from memory!
u32 Read_Instruction(const u32 _Address); u32 Read_Instruction(const u32 address);
// For use by emulator // For use by emulator
u8 Read_U8(const u32 _Address); u8 Read_U8(const u32 address);
u16 Read_U16(const u32 _Address); u16 Read_U16(const u32 address);
u32 Read_U32(const u32 _Address); u32 Read_U32(const u32 address);
u64 Read_U64(const u32 _Address); u64 Read_U64(const u32 address);
u32 Read_S8_Val(u32 address, u32 val);
u32 Read_U8_Val(u32 address, u32 val);
u32 Read_S16_Val(u32 address, u32 val);
u32 Read_U16_Val(u32 address, u32 val);
u32 Read_U32_Val(u32 address, u32 val);
u64 Read_U64_Val(u32 address, u64 val);
// Useful helper functions, used by ARM JIT // Useful helper functions, used by ARM JIT
float Read_F32(const u32 _Address); float Read_F32(const u32 address);
double Read_F64(const u32 _Address); double Read_F64(const u32 address);
// used by JIT. Return zero-extended 32bit values // used by JIT. Return zero-extended 32bit values
u32 Read_U8_ZX(const u32 _Address); u32 Read_U8_ZX(const u32 address);
u32 Read_U16_ZX(const u32 _Address); u32 Read_U16_ZX(const u32 address);
void Write_U8(const u8 _Data, const u32 _Address); void Write_U8(const u8 var, const u32 address);
void Write_U16(const u16 _Data, const u32 _Address); void Write_U16(const u16 var, const u32 address);
void Write_U32(const u32 _Data, const u32 _Address); void Write_U32(const u32 var, const u32 address);
void Write_U64(const u64 _Data, const u32 _Address); void Write_U64(const u64 var, const u32 address);
void Write_U16_Swap(const u16 _Data, const u32 _Address); void Write_U16_Swap(const u16 var, const u32 address);
void Write_U32_Swap(const u32 _Data, const u32 _Address); void Write_U32_Swap(const u32 var, const u32 address);
void Write_U64_Swap(const u64 _Data, const u32 _Address); void Write_U64_Swap(const u64 var, const u32 address);
// Useful helper functions, used by ARM JIT // Useful helper functions, used by ARM JIT
void Write_F64(const double _Data, const u32 _Address); void Write_F64(const double var, const u32 address);
std::string GetString(u32 em_address, size_t size = 0); std::string GetString(u32 em_address, size_t size = 0);
u8* GetPointer(const u32 _Address); u8* GetPointer(const u32 address);
void DMA_LCToMemory(const u32 _iMemAddr, const u32 _iCacheAddr, const u32 _iNumBlocks); void DMA_LCToMemory(const u32 memAddr, const u32 cacheAddr, const u32 numBlocks);
void DMA_MemoryToLC(const u32 _iCacheAddr, const u32 _iMemAddr, const u32 _iNumBlocks); void DMA_MemoryToLC(const u32 cacheAddr, const u32 memAddr, const u32 numBlocks);
void CopyFromEmu(void* data, u32 address, size_t size); void CopyFromEmu(void* data, u32 address, size_t size);
void CopyToEmu(u32 address, const void* data, size_t size); void CopyToEmu(u32 address, const void* data, size_t size);
void Memset(const u32 _Address, const u8 _Data, const u32 _iLength); void Memset(const u32 address, const u8 var, const u32 length);
void ClearCacheLine(const u32 _Address); // Zeroes 32 bytes; address should be 32-byte-aligned void ClearCacheLine(const u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned
// TLB functions // TLB functions
void SDRUpdated(); void SDRUpdated();
@ -142,8 +135,8 @@ enum XCheckTLBFlag
FLAG_WRITE, FLAG_WRITE,
FLAG_OPCODE, FLAG_OPCODE,
}; };
u32 TranslateAddress(u32 _Address, XCheckTLBFlag _Flag); template <const XCheckTLBFlag flag> u32 TranslateAddress(const u32 address);
void InvalidateTLBEntry(u32 _Address); void InvalidateTLBEntry(u32 address);
extern u32 pagetable_base; extern u32 pagetable_base;
extern u32 pagetable_hashmask; extern u32 pagetable_hashmask;
} }

View File

@ -16,6 +16,7 @@
// https://github.com/dolphin-emu/dolphin // https://github.com/dolphin-emu/dolphin
#include "Common/Atomic.h" #include "Common/Atomic.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
@ -91,115 +92,90 @@ static u32 EFB_Read(const u32 addr)
static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite); static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite);
template <typename T, typename U> template <XCheckTLBFlag flag, typename T>
__forceinline void ReadFromHardware(U &_var, const u32 em_address, Memory::XCheckTLBFlag flag) __forceinline T ReadFromHardware(const u32 em_address)
{
int segment = em_address >> 28;
// Quick check for an address that can't meet any of the following conditions,
// to speed up the MMU path.
if (!BitSet32(0xCFC)[segment])
{ {
// TODO: Figure out the fastest order of tests for both read and write (they are probably different). // TODO: Figure out the fastest order of tests for both read and write (they are probably different).
if ((em_address & 0xC8000000) == 0xC8000000) if ((em_address & 0xC8000000) == 0xC8000000)
{ {
if (em_address < 0xcc000000) if (em_address < 0xcc000000)
_var = EFB_Read(em_address); return EFB_Read(em_address);
else else
_var = (T)mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address); return (T)mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
} }
else if (((em_address & 0xF0000000) == 0x80000000) || else if (segment == 0x8 || segment == 0xC || segment == 0x0)
((em_address & 0xF0000000) == 0xC0000000) ||
((em_address & 0xF0000000) == 0x00000000))
{ {
_var = bswap((*(const T*)&m_pRAM[em_address & RAM_MASK])); return bswap((*(const T*)&m_pRAM[em_address & RAM_MASK]));
} }
else if (m_pEXRAM && (((em_address & 0xF0000000) == 0x90000000) || else if (m_pEXRAM && (segment == 0x9 || segment == 0xD || segment == 0x1))
((em_address & 0xF0000000) == 0xD0000000) ||
((em_address & 0xF0000000) == 0x10000000)))
{ {
_var = bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK])); return bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK]));
} }
else if ((em_address >= 0xE0000000) && (em_address < (0xE0000000+L1_CACHE_SIZE))) else if (segment == 0xE && (em_address < (0xE0000000 + L1_CACHE_SIZE)))
{ {
_var = bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK])); return bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK]));
} }
else if ((bFakeVMEM && ((em_address &0xF0000000) == 0x70000000)) || }
(bFakeVMEM && ((em_address &0xF0000000) == 0x40000000)))
if (bFakeVMEM && (segment == 0x7 || segment == 0x4))
{ {
// fake VMEM // fake VMEM
_var = bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK])); return bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK]));
} }
else
// MMU: Do page table translation
u32 tlb_addr = TranslateAddress<flag>(em_address);
if (tlb_addr == 0)
{ {
// MMU if (flag == FLAG_READ)
GenerateDSIException(em_address, false);
return 0;
}
// Handle loads that cross page boundaries (ewwww) // Handle loads that cross page boundaries (ewwww)
if (sizeof(T) > 1 && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) // The alignment check isn't strictly necessary, but since this is a rare slow path, it provides a faster
// (1 instruction on x86) bailout.
if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{ {
_var = 0;
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this // This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible. // way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 tlb_addr = TranslateAddress(em_address, flag); u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
u32 tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (tlb_addr == 0 || tlb_addr_next_page == 0)
{
if (flag == FLAG_READ)
GenerateDSIException(em_address_next_page, false);
return 0;
}
T var = 0;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++) for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++)
{ {
// Start of the new page... translate the address again! if (addr == em_address_next_page)
if (!(addr & (HW_PAGE_SIZE-1))) tlb_addr = tlb_addr_next_page;
tlb_addr = TranslateAddress(addr, flag); var = (var << 8) | Memory::base[tlb_addr];
// Important: we need to generate the DSI on the first store that caused the fault, NOT
// the address of the start of the load.
if (tlb_addr == 0)
{
if (flag == FLAG_READ)
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU)
PanicAlertT("Invalid Read at 0x%08x, PC = 0x%08x ", em_address, PC);
else
GenerateDSIException(addr, false);
break;
}
}
else
{
if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000)
{
_var <<= 8;
_var |= m_pEXRAM[tlb_addr & EXRAM_MASK];
}
else
{
_var <<= 8;
_var |= m_pRAM[tlb_addr & RAM_MASK];
}
}
}
}
else
{
u32 tlb_addr = TranslateAddress(em_address, flag);
if (tlb_addr == 0)
{
if (flag == FLAG_READ)
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU)
PanicAlertT("Invalid Read at 0x%08x, PC = 0x%08x ", em_address, PC);
else
GenerateDSIException(em_address, false);
}
}
else
{
if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000)
{
_var = bswap((*(const T*)&m_pEXRAM[tlb_addr & EXRAM_MASK]));
}
else
{
_var = bswap((*(const T*)&m_pRAM[tlb_addr & RAM_MASK]));
}
}
} }
return var;
} }
// The easy case!
return bswap(*(const T*)&Memory::base[tlb_addr]);
} }
template <typename T> template <XCheckTLBFlag flag, typename T>
__forceinline void WriteToHardware(u32 em_address, const T data, Memory::XCheckTLBFlag flag) __forceinline void WriteToHardware(u32 em_address, const T data)
{
int segment = em_address >> 28;
// Quick check for an address that can't meet any of the following conditions,
// to speed up the MMU path.
if (!BitSet32(0xCFC)[segment])
{ {
// First, let's check for FIFO writes, since they are probably the most common // First, let's check for FIFO writes, since they are probably the most common
// reason we end up in this function: // reason we end up in this function:
@ -239,95 +215,64 @@ __forceinline void WriteToHardware(u32 em_address, const T data, Memory::XCheckT
return; return;
} }
} }
else if (((em_address & 0xF0000000) == 0x80000000) || else if (segment == 0x8 || segment == 0xC || segment == 0x0)
((em_address & 0xF0000000) == 0xC0000000) ||
((em_address & 0xF0000000) == 0x00000000))
{ {
*(T*)&m_pRAM[em_address & RAM_MASK] = bswap(data); *(T*)&m_pRAM[em_address & RAM_MASK] = bswap(data);
return; return;
} }
else if (m_pEXRAM && (((em_address & 0xF0000000) == 0x90000000) || else if (m_pEXRAM && (segment == 0x9 || segment == 0xD || segment == 0x1))
((em_address & 0xF0000000) == 0xD0000000) ||
((em_address & 0xF0000000) == 0x10000000)))
{ {
*(T*)&m_pEXRAM[em_address & EXRAM_MASK] = bswap(data); *(T*)&m_pEXRAM[em_address & EXRAM_MASK] = bswap(data);
return; return;
} }
else if ((em_address >= 0xE0000000) && (em_address < (0xE0000000+L1_CACHE_SIZE))) else if (segment == 0xE && (em_address < (0xE0000000 + L1_CACHE_SIZE)))
{ {
*(T*)&m_pL1Cache[em_address & L1_CACHE_MASK] = bswap(data); *(T*)&m_pL1Cache[em_address & L1_CACHE_MASK] = bswap(data);
return; return;
} }
else if ((bFakeVMEM && ((em_address &0xF0000000) == 0x70000000)) || }
(bFakeVMEM && ((em_address &0xF0000000) == 0x40000000)))
if (bFakeVMEM && (segment == 0x7 || segment == 0x4))
{ {
// fake VMEM // fake VMEM
*(T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK] = bswap(data); *(T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK] = bswap(data);
return;
} }
else
// MMU: Do page table translation
u32 tlb_addr = TranslateAddress<flag>(em_address);
if (tlb_addr == 0)
{ {
// MMU if (flag == FLAG_WRITE)
GenerateDSIException(em_address, true);
return;
}
// Handle stores that cross page boundaries (ewwww) // Handle stores that cross page boundaries (ewwww)
if (sizeof(T) > 1 && (em_address & (HW_PAGE_SIZE-1)) > HW_PAGE_SIZE - sizeof(T)) if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{ {
T val = bswap(data); T val = bswap(data);
u32 tlb_addr = TranslateAddress(em_address, flag);
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++) // We need to check both addresses before writing in case there's a DSI.
{ u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
if (!(addr & (HW_PAGE_SIZE-1))) u32 tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
tlb_addr = TranslateAddress(addr, flag); if (tlb_addr_next_page == 0)
if (tlb_addr == 0)
{ {
if (flag == FLAG_WRITE) if (flag == FLAG_WRITE)
GenerateDSIException(em_address_next_page, true);
return;
}
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++, val >>= 8)
{ {
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU) if (addr == em_address_next_page)
PanicAlertT("Invalid Write to 0x%08x, PC = 0x%08x ", em_address, PC); tlb_addr = tlb_addr_next_page;
else Memory::base[tlb_addr] = (u8)val;
GenerateDSIException(addr, true);
break;
}
}
else
{
if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000)
{
m_pEXRAM[tlb_addr & EXRAM_MASK] = (u8)val;
val >>= 8;
}
else
{
m_pRAM[tlb_addr & RAM_MASK] = (u8)val;
val >>= 8;
}
}
}
}
else
{
u32 tlb_addr = TranslateAddress(em_address, flag);
if (tlb_addr == 0)
{
if (flag == FLAG_WRITE)
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU)
PanicAlertT("Invalid Write to 0x%08x, PC = 0x%08x ", em_address, PC);
else
GenerateDSIException(em_address, true);
}
}
else
{
if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000)
{
*(T*)&m_pEXRAM[tlb_addr & EXRAM_MASK] = bswap(data);
}
else
{
*(T*)&m_pRAM[tlb_addr & RAM_MASK] = bswap(data);
}
}
} }
return;
} }
// The easy case!
*(T*)&Memory::base[tlb_addr] = bswap(data);
} }
// ===================== // =====================
@ -339,9 +284,9 @@ __forceinline void WriteToHardware(u32 em_address, const T data, Memory::XCheckT
static void GenerateISIException(u32 effective_address); static void GenerateISIException(u32 effective_address);
u32 Read_Opcode(u32 _Address) u32 Read_Opcode(u32 address)
{ {
if (_Address == 0x00000000) if (address == 0x00000000)
{ {
// FIXME use assert? // FIXME use assert?
PanicAlert("Program tried to read an opcode from [00000000]. It has crashed."); PanicAlert("Program tried to read an opcode from [00000000]. It has crashed.");
@ -349,85 +294,65 @@ u32 Read_Opcode(u32 _Address)
} }
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU &&
(_Address & ADDR_MASK_MEM1)) (address & ADDR_MASK_MEM1))
{ {
// TODO: Check for MSR instruction address translation flag before translating // TODO: Check for MSR instruction address translation flag before translating
u32 tlb_addr = Memory::TranslateAddress(_Address, FLAG_OPCODE); u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (tlb_addr == 0) if (tlb_addr == 0)
{ {
GenerateISIException(_Address); GenerateISIException(address);
return 0; return 0;
} }
else else
{ {
_Address = tlb_addr; address = tlb_addr;
} }
} }
return PowerPC::ppcState.iCache.ReadInstruction(_Address); return PowerPC::ppcState.iCache.ReadInstruction(address);
} }
u8 Read_U8(const u32 _Address) static __forceinline void Memcheck(u32 address, u32 var, bool write, int size)
{ {
u8 _var = 0;
ReadFromHardware<u8>(_var, _Address, FLAG_READ);
#ifdef ENABLE_MEM_CHECK #ifdef ENABLE_MEM_CHECK
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); TMemCheck *mc = PowerPC::memchecks.GetMemCheck(address);
if (mc) if (mc)
{ {
mc->numHits++; mc->numHits++;
mc->Action(&PowerPC::debug_interface, _var, _Address, false, 1, PC); mc->Action(&PowerPC::debug_interface, var, address, write, size, PC);
} }
#endif #endif
return (u8)_var;
} }
u16 Read_U16(const u32 _Address) u8 Read_U8(const u32 address)
{ {
u16 _var = 0; u8 var = ReadFromHardware<FLAG_READ, u8>(address);
ReadFromHardware<u16>(_var, _Address, FLAG_READ); Memcheck(address, var, false, 1);
#ifdef ENABLE_MEM_CHECK return (u8)var;
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, _var, _Address, false, 2, PC);
}
#endif
return (u16)_var;
} }
u32 Read_U32(const u32 _Address) u16 Read_U16(const u32 address)
{ {
u32 _var = 0; u16 var = ReadFromHardware<FLAG_READ, u16>(address);
ReadFromHardware<u32>(_var, _Address, FLAG_READ); Memcheck(address, var, false, 2);
#ifdef ENABLE_MEM_CHECK return (u16)var;
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, _var, _Address, false, 4, PC);
}
#endif
return _var;
} }
u64 Read_U64(const u32 _Address) u32 Read_U32(const u32 address)
{ {
u64 _var = 0; u32 var = ReadFromHardware<FLAG_READ, u32>(address);
ReadFromHardware<u64>(_var, _Address, FLAG_READ); Memcheck(address, var, false, 4);
#ifdef ENABLE_MEM_CHECK return var;
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, (u32)_var, _Address, false, 8, PC);
}
#endif
return _var;
} }
double Read_F64(const u32 _Address) u64 Read_U64(const u32 address)
{
u64 var = ReadFromHardware<FLAG_READ, u64>(address);
Memcheck(address, (u32)var, false, 8);
return var;
}
double Read_F64(const u32 address)
{ {
union union
{ {
@ -435,11 +360,11 @@ double Read_F64(const u32 _Address)
double d; double d;
} cvt; } cvt;
cvt.i = Read_U64(_Address); cvt.i = Read_U64(address);
return cvt.d; return cvt.d;
} }
float Read_F32(const u32 _Address) float Read_F32(const u32 address)
{ {
union union
{ {
@ -447,158 +372,92 @@ float Read_F32(const u32 _Address)
float d; float d;
} cvt; } cvt;
cvt.i = Read_U32(_Address); cvt.i = Read_U32(address);
return cvt.d; return cvt.d;
} }
u32 Read_U8_Val(u32 address, u32 val) u32 Read_U8_ZX(const u32 address)
{ {
ReadFromHardware<u8>(val, address, FLAG_READ); return (u32)Read_U8(address);
return val;
} }
u32 Read_S8_Val(u32 address, u32 val) u32 Read_U16_ZX(const u32 address)
{ {
ReadFromHardware<s8>(val, address, FLAG_READ); return (u32)Read_U16(address);
return val;
} }
u32 Read_U16_Val(u32 address, u32 val) void Write_U8(const u8 var, const u32 address)
{ {
ReadFromHardware<u16>(val, address, FLAG_READ); Memcheck(address, var, true, 1);
return val; WriteToHardware<FLAG_WRITE, u8>(address, var);
} }
u32 Read_S16_Val(u32 address, u32 val) void Write_U16(const u16 var, const u32 address)
{ {
ReadFromHardware<s16>(val, address, FLAG_READ); Memcheck(address, var, true, 2);
return val; WriteToHardware<FLAG_WRITE, u16>(address, var);
} }
void Write_U16_Swap(const u16 var, const u32 address)
u32 Read_U32_Val(u32 address, u32 val)
{ {
ReadFromHardware<u32>(val, address, FLAG_READ); Memcheck(address, var, true, 2);
return val; Write_U16(Common::swap16(var), address);
}
u64 Read_U64_Val(u32 address, u64 val)
{
ReadFromHardware<u64>(val, address, FLAG_READ);
return val;
}
u32 Read_U8_ZX(const u32 _Address)
{
return (u32)Read_U8(_Address);
}
u32 Read_U16_ZX(const u32 _Address)
{
return (u32)Read_U16(_Address);
}
void Write_U8(const u8 _Data, const u32 _Address)
{
#ifdef ENABLE_MEM_CHECK
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, _Data,_Address,true,1,PC);
}
#endif
WriteToHardware<u8>(_Address, _Data, FLAG_WRITE);
} }
void Write_U16(const u16 _Data, const u32 _Address) void Write_U32(const u32 var, const u32 address)
{ {
#ifdef ENABLE_MEM_CHECK Memcheck(address, var, true, 4);
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); WriteToHardware<FLAG_WRITE, u32>(address, var);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, _Data,_Address,true,2,PC);
} }
#endif void Write_U32_Swap(const u32 var, const u32 address)
WriteToHardware<u16>(_Address, _Data, FLAG_WRITE);
}
void Write_U16_Swap(const u16 _Data, const u32 _Address)
{ {
Write_U16(Common::swap16(_Data), _Address); Memcheck(address, var, true, 4);
Write_U32(Common::swap32(var), address);
} }
void Write_U64(const u64 var, const u32 address)
void Write_U32(const u32 _Data, const u32 _Address)
{ {
#ifdef ENABLE_MEM_CHECK Memcheck(address, (u32)var, true, 8);
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); WriteToHardware<FLAG_WRITE, u64>(address, var);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, _Data,_Address,true,4,PC);
} }
#endif void Write_U64_Swap(const u64 var, const u32 address)
WriteToHardware<u32>(_Address, _Data, FLAG_WRITE);
}
void Write_U32_Swap(const u32 _Data, const u32 _Address)
{ {
Write_U32(Common::swap32(_Data), _Address); Memcheck(address, (u32)var, true, 8);
Write_U64(Common::swap64(var), address);
} }
void Write_U64(const u64 _Data, const u32 _Address) void Write_F64(const double var, const u32 address)
{
#ifdef ENABLE_MEM_CHECK
TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address);
if (mc)
{
mc->numHits++;
mc->Action(&PowerPC::debug_interface, (u32)_Data,_Address,true,8,PC);
}
#endif
WriteToHardware<u64>(_Address, _Data, FLAG_WRITE);
}
void Write_U64_Swap(const u64 _Data, const u32 _Address)
{
Write_U64(Common::swap64(_Data), _Address);
}
void Write_F64(const double _Data, const u32 _Address)
{ {
union union
{ {
u64 i; u64 i;
double d; double d;
} cvt; } cvt;
cvt.d = _Data; cvt.d = var;
Write_U64(cvt.i, _Address); Write_U64(cvt.i, address);
} }
u8 ReadUnchecked_U8(const u32 _Address) u8 ReadUnchecked_U8(const u32 address)
{ {
u8 _var = 0; u8 var = ReadFromHardware<FLAG_NO_EXCEPTION, u8>(address);
ReadFromHardware<u8>(_var, _Address, FLAG_NO_EXCEPTION); return var;
return _var;
} }
u32 ReadUnchecked_U32(const u32 _Address) u32 ReadUnchecked_U32(const u32 address)
{ {
u32 _var = 0; u32 var = ReadFromHardware<FLAG_NO_EXCEPTION, u32>(address);
ReadFromHardware<u32>(_var, _Address, FLAG_NO_EXCEPTION); return var;
return _var;
} }
void WriteUnchecked_U8(const u8 _iValue, const u32 _Address) void WriteUnchecked_U8(const u8 var, const u32 address)
{ {
WriteToHardware<u8>(_Address, _iValue, FLAG_NO_EXCEPTION); WriteToHardware<FLAG_NO_EXCEPTION, u8>(address, var);
} }
void WriteUnchecked_U32(const u32 _iValue, const u32 _Address) void WriteUnchecked_U32(const u32 var, const u32 address)
{ {
WriteToHardware<u32>(_Address, _iValue, FLAG_NO_EXCEPTION); WriteToHardware<FLAG_NO_EXCEPTION, u32>(address, var);
} }
// ********************************************************************************* // *********************************************************************************
@ -694,14 +553,21 @@ union UPTE2
u32 Hex; u32 Hex;
}; };
static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite) static void GenerateDSIException(u32 effectiveAddress, bool write)
{ {
if (_bWrite) // DSI exceptions are only supported in MMU mode.
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU)
{
PanicAlertT("Invalid %s to 0x%08x, PC = 0x%08x ", write ? "Write to" : "Read from", effectiveAddress, PC);
return;
}
if (effectiveAddress)
PowerPC::ppcState.spr[SPR_DSISR] = PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE; PowerPC::ppcState.spr[SPR_DSISR] = PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE;
else else
PowerPC::ppcState.spr[SPR_DSISR] = PPC_EXC_DSISR_PAGE; PowerPC::ppcState.spr[SPR_DSISR] = PPC_EXC_DSISR_PAGE;
PowerPC::ppcState.spr[SPR_DAR] = _EffectiveAddress; PowerPC::ppcState.spr[SPR_DAR] = effectiveAddress;
Common::AtomicOr(PowerPC::ppcState.Exceptions, EXCEPTION_DSI); Common::AtomicOr(PowerPC::ppcState.Exceptions, EXCEPTION_DSI);
} }
@ -741,111 +607,105 @@ void SDRUpdated()
PowerPC::ppcState.pagetable_hashmask = ((xx<<10)|0x3ff); PowerPC::ppcState.pagetable_hashmask = ((xx<<10)|0x3ff);
} }
enum TLBLookupResult
static __forceinline u32 LookupTLBPageAddress(const XCheckTLBFlag _Flag, const u32 vpa, u32 *paddr)
{ {
PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[_Flag == FLAG_OPCODE][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; TLB_FOUND,
if (tlbe[0].tag == (vpa & ~0xfff) && !(tlbe[0].flags & TLB_FLAG_INVALID)) TLB_NOTFOUND,
TLB_UPDATE_C
};
static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 vpa, u32 *paddr)
{
int tag = vpa >> HW_PAGE_INDEX_SHIFT;
PowerPC::tlb_entry *tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK];
if (tlbe->tag[0] == tag)
{ {
// Check if C bit requires updating // Check if C bit requires updating
if (_Flag == FLAG_WRITE) if (flag == FLAG_WRITE)
{ {
UPTE2 PTE2; UPTE2 PTE2;
PTE2.Hex = tlbe[0].pte; PTE2.Hex = tlbe->pte[0];
if (PTE2.C == 0) if (PTE2.C == 0)
{ {
PTE2.C = 1; PTE2.C = 1;
tlbe[0].pte = PTE2.Hex; tlbe->pte[0] = PTE2.Hex;
return 0; return TLB_UPDATE_C;
} }
} }
if (_Flag != FLAG_NO_EXCEPTION) if (flag != FLAG_NO_EXCEPTION)
{ tlbe->recent = 0;
tlbe[0].flags |= TLB_FLAG_MOST_RECENT;
tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT;
}
*paddr = tlbe[0].paddr | (vpa & 0xfff); *paddr = tlbe->paddr[0] | (vpa & 0xfff);
return 1; return TLB_FOUND;
} }
if (tlbe[1].tag == (vpa & ~0xfff) && !(tlbe[1].flags & TLB_FLAG_INVALID)) if (tlbe->tag[1] == tag)
{ {
// Check if C bit requires updating // Check if C bit requires updating
if (_Flag == FLAG_WRITE) if (flag == FLAG_WRITE)
{ {
UPTE2 PTE2; UPTE2 PTE2;
PTE2.Hex = tlbe[1].pte; PTE2.Hex = tlbe->pte[1];
if (PTE2.C == 0) if (PTE2.C == 0)
{ {
PTE2.C = 1; PTE2.C = 1;
tlbe[1].pte = PTE2.Hex; tlbe->pte[1] = PTE2.Hex;
return 0; return TLB_UPDATE_C;
} }
} }
if (_Flag != FLAG_NO_EXCEPTION) if (flag != FLAG_NO_EXCEPTION)
tlbe->recent = 1;
*paddr = tlbe->paddr[1] | (vpa & 0xfff);
return TLB_FOUND;
}
return TLB_NOTFOUND;
}
static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address)
{ {
tlbe[1].flags |= TLB_FLAG_MOST_RECENT; if (flag == FLAG_NO_EXCEPTION)
tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT;
}
*paddr = tlbe[1].paddr | (vpa & 0xfff);
return 1;
}
return 0;
}
static __forceinline void UpdateTLBEntry(const XCheckTLBFlag _Flag, UPTE2 PTE2, const u32 vpa)
{
if (_Flag == FLAG_NO_EXCEPTION)
return; return;
PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[_Flag == FLAG_OPCODE][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; int tag = address >> HW_PAGE_INDEX_SHIFT;
if ((tlbe[0].flags & TLB_FLAG_MOST_RECENT) == 0 || (tlbe[0].flags & TLB_FLAG_INVALID)) PowerPC::tlb_entry *tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK];
{ int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID;
tlbe[0].flags = TLB_FLAG_MOST_RECENT; tlbe->recent = index;
tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT; tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
tlbe[0].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT; tlbe->pte[index] = PTE2.Hex;
tlbe[0].pte = PTE2.Hex; tlbe->tag[index] = tag;
tlbe[0].tag = vpa & ~0xfff;
}
else
{
tlbe[1].flags = TLB_FLAG_MOST_RECENT;
tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT;
tlbe[1].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
tlbe[1].pte = PTE2.Hex;
tlbe[1].tag = vpa & ~0xfff;
}
} }
void InvalidateTLBEntry(u32 vpa) void InvalidateTLBEntry(u32 address)
{ {
PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[0][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry *tlbe = &PowerPC::ppcState.tlb[0][(address >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK];
tlbe[0].flags |= TLB_FLAG_INVALID; tlbe->tag[0] = TLB_TAG_INVALID;
tlbe[1].flags |= TLB_FLAG_INVALID; tlbe->tag[1] = TLB_TAG_INVALID;
PowerPC::tlb_entry *tlbe_i = PowerPC::ppcState.tlb[1][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry *tlbe_i = &PowerPC::ppcState.tlb[1][(address >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK];
tlbe_i[0].flags |= TLB_FLAG_INVALID; tlbe_i->tag[0] = TLB_TAG_INVALID;
tlbe_i[1].flags |= TLB_FLAG_INVALID; tlbe_i->tag[1] = TLB_TAG_INVALID;
} }
// Page Address Translation // Page Address Translation
static __forceinline u32 TranslatePageAddress(const u32 _Address, const XCheckTLBFlag _Flag) static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLBFlag flag)
{ {
// TLB cache // TLB cache
// This catches 99%+ of lookups in practice, so the actual page table entry code below doesn't benefit
// much from optimization.
u32 translatedAddress = 0; u32 translatedAddress = 0;
if (LookupTLBPageAddress(_Flag, _Address, &translatedAddress)) TLBLookupResult res = LookupTLBPageAddress(flag , address, &translatedAddress);
if (res == TLB_FOUND)
return translatedAddress; return translatedAddress;
u32 sr = PowerPC::ppcState.sr[EA_SR(_Address)]; u32 sr = PowerPC::ppcState.sr[EA_SR(address)];
u32 offset = EA_Offset(_Address); // 12 bit u32 offset = EA_Offset(address); // 12 bit
u32 page_index = EA_PageIndex(_Address); // 16 bit u32 page_index = EA_PageIndex(address); // 16 bit
u32 VSID = SR_VSID(sr); // 24 bit u32 VSID = SR_VSID(sr); // 24 bit
u32 api = EA_API(_Address); // 6 bit (part of page_index) u32 api = EA_API(address); // 6 bit (part of page_index)
// Direct access to the fastmem Arena // Direct access to the fastmem Arena
// FIXME: is this the best idea for clean code? // FIXME: is this the best idea for clean code?
@ -853,37 +713,28 @@ static __forceinline u32 TranslatePageAddress(const u32 _Address, const XCheckTL
// hash function no 1 "xor" .360 // hash function no 1 "xor" .360
u32 hash = (VSID ^ page_index); u32 hash = (VSID ^ page_index);
u32 pte1 = bswap((VSID << 7) | api | PTE1_V);
for (int hash_func = 0; hash_func < 2; hash_func++) for (int hash_func = 0; hash_func < 2; hash_func++)
{ {
// hash function no 2 "not" .360
if (hash_func == 1) if (hash_func == 1)
{ {
// hash function no 2 "not" .360
hash = ~hash; hash = ~hash;
pte1 |= PTE1_H << 24;
} }
u32 pteg_addr = ((hash & PowerPC::ppcState.pagetable_hashmask) << 6) | PowerPC::ppcState.pagetable_base; u32 pteg_addr = ((hash & PowerPC::ppcState.pagetable_hashmask) << 6) | PowerPC::ppcState.pagetable_base;
if ((pteg_addr >> 28) == 1) for (int i = 0; i < 8; i++, pteg_addr += 8)
base_mem = Memory::m_pEXRAM;
for (int i = 0; i < 8; i++)
{ {
u32 pte = bswap(*(u32*)&base_mem[pteg_addr]); if (pte1 == *(u32*)&base_mem[pteg_addr])
bool pteh = (pte & PTE1_H) == 0;
if (hash_func == 1)
pteh = !pteh;
if ((pte & PTE1_V) && pteh)
{
if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte)))
{ {
UPTE2 PTE2; UPTE2 PTE2;
PTE2.Hex = bswap((*(u32*)&base_mem[(pteg_addr + 4)])); PTE2.Hex = bswap((*(u32*)&base_mem[(pteg_addr + 4)]));
// set the access bits // set the access bits
switch (_Flag) switch (flag)
{ {
case FLAG_NO_EXCEPTION: break; case FLAG_NO_EXCEPTION: break;
case FLAG_READ: PTE2.R = 1; break; case FLAG_READ: PTE2.R = 1; break;
@ -891,16 +742,16 @@ static __forceinline u32 TranslatePageAddress(const u32 _Address, const XCheckTL
case FLAG_OPCODE: PTE2.R = 1; break; case FLAG_OPCODE: PTE2.R = 1; break;
} }
if (_Flag != FLAG_NO_EXCEPTION) if (flag != FLAG_NO_EXCEPTION)
*(u32*)&base_mem[(pteg_addr + 4)] = bswap(PTE2.Hex); *(u32*)&base_mem[(pteg_addr + 4)] = bswap(PTE2.Hex);
UpdateTLBEntry(_Flag, PTE2, _Address); // We already updated the TLB entry if this was caused by a C bit.
if (res != TLB_UPDATE_C)
UpdateTLBEntry(flag, PTE2, address);
return (PTE2.RPN << 12) | offset; return (PTE2.RPN << 12) | offset;
} }
} }
pteg_addr += 8;
}
} }
return 0; return 0;
} }
@ -942,7 +793,7 @@ static inline bool CheckAddrBats(const u32 addr, u32* result, u32 batu, u32 spr)
} }
// Block Address Translation // Block Address Translation
static u32 TranslateBlockAddress(const u32 addr, const XCheckTLBFlag _Flag) static u32 TranslateBlockAddress(const u32 address, const XCheckTLBFlag flag)
{ {
u32 result = 0; u32 result = 0;
UReg_MSR& m_MSR = ((UReg_MSR&)PowerPC::ppcState.msr); UReg_MSR& m_MSR = ((UReg_MSR&)PowerPC::ppcState.msr);
@ -951,21 +802,22 @@ static u32 TranslateBlockAddress(const u32 addr, const XCheckTLBFlag _Flag)
// Check for enhanced mode (secondary BAT enable) using 8 BATs // Check for enhanced mode (secondary BAT enable) using 8 BATs
bool enhanced_bats = SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && HID4.SBE; bool enhanced_bats = SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && HID4.SBE;
if (_Flag != FLAG_OPCODE) if (flag != FLAG_OPCODE)
{ {
if (!CheckAddrBats(addr, &result, batu, SPR_DBAT0U) && enhanced_bats) if (!CheckAddrBats(address, &result, batu, SPR_DBAT0U) && enhanced_bats)
CheckAddrBats(addr, &result, batu, SPR_DBAT4U); CheckAddrBats(address, &result, batu, SPR_DBAT4U);
} }
else else
{ {
if (!CheckAddrBats(addr, &result, batu, SPR_IBAT0U) && enhanced_bats) if (!CheckAddrBats(address, &result, batu, SPR_IBAT0U) && enhanced_bats)
CheckAddrBats(addr, &result, batu, SPR_IBAT4U); CheckAddrBats(address, &result, batu, SPR_IBAT4U);
} }
return result; return result;
} }
// Translate effective address using BAT or PAT. Returns 0 if the address cannot be translated. // Translate effective address using BAT or PAT. Returns 0 if the address cannot be translated.
u32 TranslateAddress(const u32 _Address, const XCheckTLBFlag _Flag) template <const XCheckTLBFlag flag>
u32 TranslateAddress(const u32 address)
{ {
// Check MSR[IR] bit before translating instruction addresses. Rogue Leader clears IR and DR?? // Check MSR[IR] bit before translating instruction addresses. Rogue Leader clears IR and DR??
//if ((_Flag == FLAG_OPCODE) && !(MSR & (1 << (31 - 26)))) return _Address; //if ((_Flag == FLAG_OPCODE) && !(MSR & (1 << (31 - 26)))) return _Address;
@ -977,10 +829,15 @@ u32 TranslateAddress(const u32 _Address, const XCheckTLBFlag _Flag)
// so only do it where it's really needed. // so only do it where it's really needed.
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bBAT) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bBAT)
{ {
u32 tlb_addr = TranslateBlockAddress(_Address, _Flag); u32 tlb_addr = TranslateBlockAddress(address, flag);
if (tlb_addr) if (tlb_addr)
return tlb_addr; return tlb_addr;
} }
return TranslatePageAddress(_Address, _Flag); return TranslatePageAddress(address, flag);
} }
template u32 TranslateAddress<Memory::FLAG_NO_EXCEPTION>(const u32 address);
template u32 TranslateAddress<Memory::FLAG_READ>(const u32 address);
template u32 TranslateAddress<Memory::FLAG_WRITE>(const u32 address);
template u32 TranslateAddress<Memory::FLAG_OPCODE>(const u32 address);
} // namespace } // namespace

View File

@ -178,11 +178,12 @@ void Jit64::Init()
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
jo.accurateSinglePrecision = true; jo.accurateSinglePrecision = true;
js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU; js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU;
js.fastmemLoadStore = NULL;
gpr.SetEmitter(this); gpr.SetEmitter(this);
fpr.SetEmitter(this); fpr.SetEmitter(this);
trampolines.Init(); trampolines.Init(js.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE);
AllocCodeSpace(CODE_SIZE); AllocCodeSpace(CODE_SIZE);
// BLR optimization has the same consequences as block linking, as well as // BLR optimization has the same consequences as block linking, as well as
@ -493,6 +494,7 @@ void Jit64::Jit(u32 em_address)
{ {
if (GetSpaceLeft() < 0x10000 || if (GetSpaceLeft() < 0x10000 ||
farcode.GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 ||
trampolines.GetSpaceLeft() < 0x10000 ||
blocks.IsFull() || blocks.IsFull() ||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache ||
m_clear_cache_asap) m_clear_cache_asap)
@ -612,6 +614,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.instructionsLeft = (code_block.m_num_instructions - 1) - i; js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
const GekkoOPInfo *opinfo = ops[i].opinfo; const GekkoOPInfo *opinfo = ops[i].opinfo;
js.downcountAmount += opinfo->numCycles; js.downcountAmount += opinfo->numCycles;
js.fastmemLoadStore = NULL;
js.fixupExceptionHandler = false;
js.revertGprLoad = -1;
js.revertFprLoad = -1;
if (i == (code_block.m_num_instructions - 1)) if (i == (code_block.m_num_instructions - 1))
{ {
@ -761,22 +767,37 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
Jit64Tables::CompileInstruction(ops[i]); Jit64Tables::CompileInstruction(ops[i]);
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreFromRegister(j);
if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle it.
FixupBranch memException;
_assert_msg_(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler),
"Fastmem loadstores shouldn't have exception handler fixups (PC=%x)!", ops[i].address);
if (!js.fastmemLoadStore && !js.fixupExceptionHandler)
{ {
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch memException = J_CC(CC_NZ, true); memException = J_CC(CC_NZ, true);
}
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(memException); if (!js.fastmemLoadStore)
{
exceptionHandlerAtLoc[js.fastmemLoadStore] = NULL;
SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException);
}
else
{
exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr();
}
gpr.Flush(FLUSH_MAINTAIN_STATE); BitSet32 gprToFlush = BitSet32::AllTrue(32);
fpr.Flush(FLUSH_MAINTAIN_STATE); BitSet32 fprToFlush = BitSet32::AllTrue(32);
if (js.revertGprLoad >= 0)
gprToFlush[js.revertGprLoad] = false;
if (js.revertFprLoad >= 0)
fprToFlush[js.revertFprLoad] = false;
gpr.Flush(FLUSH_MAINTAIN_STATE, gprToFlush);
fpr.Flush(FLUSH_MAINTAIN_STATE, fprToFlush);
// If a memory exception occurs, the exception handler will read // If a memory exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
@ -785,6 +806,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SwitchToNearCode(); SwitchToNearCode();
} }
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreFromRegister(j);
if (opinfo->flags & FL_LOADSTORE) if (opinfo->flags & FL_LOADSTORE)
++jit->js.numLoadStoreInst; ++jit->js.numLoadStoreInst;

View File

@ -133,6 +133,7 @@ public:
// Clobbers RDX. // Clobbers RDX.
void SetCRFieldBit(int field, int bit, Gen::X64Reg in); void SetCRFieldBit(int field, int bit, Gen::X64Reg in);
void ClearCRFieldBit(int field, int bit); void ClearCRFieldBit(int field, int bit);
void SetCRFieldBit(int field, int bit);
// Generates a branch that will check if a given bit of a CR register part // Generates a branch that will check if a given bit of a CR register part
// is set or not. // is set or not.

View File

@ -226,6 +226,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
GenFrsqrte(); GenFrsqrte();
fres = AlignCode4(); fres = AlignCode4();
GenFres(); GenFres();
mfcr = AlignCode4();
GenMfcr();
GenQuantizedLoads(); GenQuantizedLoads();
GenQuantizedStores(); GenQuantizedStores();

View File

@ -401,7 +401,7 @@ void FPURegCache::StoreRegister(size_t preg, OpArg newLoc)
emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg()); emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg());
} }
void RegCache::Flush(FlushMode mode) void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush)
{ {
for (unsigned int i = 0; i < xregs.size(); i++) for (unsigned int i = 0; i < xregs.size(); i++)
{ {
@ -409,7 +409,7 @@ void RegCache::Flush(FlushMode mode)
PanicAlert("Someone forgot to unlock X64 reg %u", i); PanicAlert("Someone forgot to unlock X64 reg %u", i);
} }
for (unsigned int i = 0; i < regs.size(); i++) for (unsigned int i : regsToFlush)
{ {
if (regs[i].locked) if (regs[i].locked)
{ {

View File

@ -81,7 +81,7 @@ public:
LockX(reg1); LockX(reg2); LockX(reg1); LockX(reg2);
} }
void Flush(FlushMode mode = FLUSH_ALL); void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32));
void Flush(PPCAnalyst::CodeOp *op) {Flush();} void Flush(PPCAnalyst::CodeOp *op) {Flush();}
int SanityCheck() const; int SanityCheck() const;
void KillImmediate(size_t preg, bool doLoad, bool makeDirty); void KillImmediate(size_t preg, bool doLoad, bool makeDirty);

View File

@ -246,29 +246,41 @@ void Jit64::lXXx(UGeckoInstruction inst)
} }
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, js.memcheck, true);
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && storeAddress) if (update && storeAddress)
gpr.BindToRegister(a, true, true);
// A bit of an evil hack here. We need to retain the original value of this register for the
// exception path, but we'd rather not needlessly pass it around if we don't have to, since
// the exception path is very rare. So we store the value in the regcache, let the load path
// clobber it, then restore the value in the exception path.
// TODO: no other load has to do this at the moment, since no other loads go directly to the
// target registers, but if that ever changes, we need to do it there too.
if (js.memcheck)
{ {
// We need to save the (usually scratch) address register for the update. gpr.StoreFromRegister(d);
registersInUse[RSCRATCH2] = true; js.revertGprLoad = d;
} }
gpr.BindToRegister(d, false, true);
BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update && storeAddress)
registersInUse[RSCRATCH2] = true;
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress) if (update && storeAddress)
{ {
gpr.BindToRegister(a, true, true); MemoryExceptionCheck();
MEMCHECK_START(false)
MOV(32, gpr.R(a), opAddress); MOV(32, gpr.R(a), opAddress);
MEMCHECK_END
} }
// TODO: support no-swap in SafeLoadToReg instead // TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed) if (byte_reversed)
{ {
MEMCHECK_START(false) MemoryExceptionCheck();
BSWAP(accessSize, gpr.RX(d)); BSWAP(accessSize, gpr.RX(d));
MEMCHECK_END
} }
gpr.UnlockAll(); gpr.UnlockAll();
@ -372,9 +384,8 @@ void Jit64::stX(UGeckoInstruction inst)
else else
{ {
gpr.KillImmediate(a, true, true); gpr.KillImmediate(a, true, true);
MEMCHECK_START(false) MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
} }
} }
} }
@ -404,9 +415,8 @@ void Jit64::stX(UGeckoInstruction inst)
if (update) if (update)
{ {
MEMCHECK_START(false) MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
} }
} }
gpr.UnlockAll(); gpr.UnlockAll();
@ -425,12 +435,9 @@ void Jit64::stXx(UGeckoInstruction inst)
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
if (update) if (update)
{
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(RSCRATCH2), gpr.R(a)); if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
}
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{ {
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
} }
@ -462,7 +469,10 @@ void Jit64::stXx(UGeckoInstruction inst)
if (gpr.R(s).IsImm()) if (gpr.R(s).IsImm())
{ {
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
} }
else else
{ {
@ -477,15 +487,16 @@ void Jit64::stXx(UGeckoInstruction inst)
gpr.BindToRegister(s, true, false); gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s); reg_value = gpr.RX(s);
} }
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update)
registersInUse[RSCRATCH2] = true;
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
} }
if (update && js.memcheck) if (update)
{ {
// revert the address change if an exception occurred MemoryExceptionCheck();
MEMCHECK_START(true) MOV(32, gpr.R(a), R(RSCRATCH2));
SUB(32, gpr.R(a), gpr.R(b));
MEMCHECK_END;
} }
gpr.UnlockAll(); gpr.UnlockAll();

View File

@ -46,9 +46,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
} }
else else
{ {
addr = R(RSCRATCH); addr = R(RSCRATCH2);
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else else
{ {
MOV(32, addr, gpr.R(b)); MOV(32, addr, gpr.R(b));
@ -65,14 +65,19 @@ void Jit64::lfXXX(UGeckoInstruction inst)
offset = (s16)inst.SIMM_16; offset = (s16)inst.SIMM_16;
} }
fpr.Lock(d);
if (js.memcheck && single)
{
fpr.StoreFromRegister(d);
js.revertFprLoad = d;
}
fpr.BindToRegister(d, !single);
BitSet32 registersInUse = CallerSavedRegistersInUse(); BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && js.memcheck) if (update && js.memcheck)
registersInUse[RSCRATCH2] = true; registersInUse[RSCRATCH2] = true;
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false); SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
fpr.Lock(d);
fpr.BindToRegister(d, js.memcheck || !single);
MEMCHECK_START(false) MemoryExceptionCheck();
if (single) if (single)
{ {
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true); ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
@ -84,7 +89,6 @@ void Jit64::lfXXX(UGeckoInstruction inst)
} }
if (update && js.memcheck) if (update && js.memcheck)
MOV(32, gpr.R(a), addr); MOV(32, gpr.R(a), addr);
MEMCHECK_END
fpr.UnlockAll(); fpr.UnlockAll();
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -141,9 +145,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)
else else
{ {
gpr.KillImmediate(a, true, true); gpr.KillImmediate(a, true, true);
MEMCHECK_START(false) MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)imm)); ADD(32, gpr.R(a), Imm32((u32)imm));
MEMCHECK_END
} }
} }
fpr.UnlockAll(); fpr.UnlockAll();
@ -152,15 +155,9 @@ void Jit64::stfXXX(UGeckoInstruction inst)
} }
s32 offset = 0; s32 offset = 0;
if (indexed)
{
if (update) if (update)
{
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); if (indexed)
MOV(32, R(RSCRATCH2), gpr.R(a));
}
else
{ {
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
@ -171,29 +168,30 @@ void Jit64::stfXXX(UGeckoInstruction inst)
ADD(32, R(RSCRATCH2), gpr.R(a)); ADD(32, R(RSCRATCH2), gpr.R(a));
} }
} }
}
else else
{ {
if (update) if (update)
{ {
gpr.BindToRegister(a, true, true); LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm));
ADD(32, gpr.R(a), Imm32(imm));
} }
else else
{ {
offset = imm; offset = imm;
}
MOV(32, R(RSCRATCH2), gpr.R(a)); MOV(32, R(RSCRATCH2), gpr.R(a));
} }
}
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, CallerSavedRegistersInUse()); BitSet32 registersInUse = CallerSavedRegistersInUse();
// We need to save the (usually scratch) address register for the update.
if (update)
registersInUse[RSCRATCH2] = true;
if (js.memcheck && update) SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
if (update)
{ {
// revert the address change if an exception occurred MemoryExceptionCheck();
MEMCHECK_START(true) MOV(32, gpr.R(a), R(RSCRATCH2));
SUB(32, gpr.R(a), indexed ? gpr.R(b) : Imm32(imm));
MEMCHECK_END
} }
fpr.UnlockAll(); fpr.UnlockAll();

View File

@ -78,12 +78,11 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
if (update && js.memcheck) if (update && js.memcheck)
{ {
MEMCHECK_START(false) MemoryExceptionCheck();
if (indexed) if (indexed)
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
else else
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -137,7 +136,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8]))); CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
MEMCHECK_START(false) MemoryExceptionCheck();
CVTPS2PD(fpr.RX(s), R(XMM0)); CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && js.memcheck) if (update && js.memcheck)
{ {
@ -146,7 +145,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
else else
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
} }
MEMCHECK_END
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();

View File

@ -112,6 +112,41 @@ void Jit64::ClearCRFieldBit(int field, int bit)
// We don't need to set bit 32; the cases where that's needed only come up when setting bits, not clearing. // We don't need to set bit 32; the cases where that's needed only come up when setting bits, not clearing.
} }
void Jit64::SetCRFieldBit(int field, int bit)
{
MOV(64, R(RSCRATCH), PPCSTATE(cr_val[field]));
if (bit != CR_GT_BIT)
{
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch dont_clear_gt = J_CC(CC_NZ);
BTS(64, R(RSCRATCH), Imm8(63));
SetJumpTarget(dont_clear_gt);
}
switch (bit)
{
case CR_SO_BIT:
BTS(64, PPCSTATE(cr_val[field]), Imm8(61));
break;
case CR_EQ_BIT:
SHR(64, R(RSCRATCH), Imm8(32));
SHL(64, R(RSCRATCH), Imm8(32));
break;
case CR_GT_BIT:
BTR(64, PPCSTATE(cr_val[field]), Imm8(63));
break;
case CR_LT_BIT:
BTS(64, PPCSTATE(cr_val[field]), Imm8(62));
break;
}
BTS(64, R(RSCRATCH), Imm8(32));
MOV(64, PPCSTATE(cr_val[field]), R(RSCRATCH));
}
FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{ {
switch (bit) switch (bit)
@ -371,39 +406,12 @@ void Jit64::mfcr(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
// USES_CR
int d = inst.RD; int d = inst.RD;
gpr.FlushLockX(RSCRATCH_EXTRA);
CALL((void *)asm_routines.mfcr);
gpr.Lock(d);
gpr.BindToRegister(d, false, true); gpr.BindToRegister(d, false, true);
XOR(32, gpr.R(d), gpr.R(d)); MOV(32, gpr.R(d), R(RSCRATCH));
X64Reg cr_val = RSCRATCH2;
// we only need to zero the high bits of RSCRATCH once
XOR(32, R(RSCRATCH), R(RSCRATCH));
for (int i = 0; i < 8; i++)
{
static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
if (i != 0)
SHL(32, gpr.R(d), Imm8(4));
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
// EQ: Bits 31-0 == 0; set flag bit 1
TEST(32, R(cr_val), R(cr_val));
SETcc(CC_Z, R(RSCRATCH));
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
// GT: Value > 0; set flag bit 2
TEST(64, R(cr_val), R(cr_val));
SETcc(CC_G, R(RSCRATCH));
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
// SO: Bit 61 set; set flag bit 0
// LT: Bit 62 set; set flag bit 3
SHR(64, R(cr_val), Imm8(61));
MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
OR(32, gpr.R(d), R(RSCRATCH));
}
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -506,6 +514,13 @@ void Jit64::crXXX(UGeckoInstruction inst)
return; return;
} }
// Special case: crset
if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289)
{
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3));
return;
}
// TODO(delroth): Potential optimizations could be applied here. For // TODO(delroth): Potential optimizations could be applied here. For
// instance, if the two CR bits being loaded are the same, two loads are // instance, if the two CR bits being loaded are the same, two loads are
// not required. // not required.

View File

@ -249,7 +249,7 @@ void JitIL::Init()
jo.accurateSinglePrecision = false; jo.accurateSinglePrecision = false;
js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU; js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU;
trampolines.Init(); trampolines.Init(js.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE);
AllocCodeSpace(CODE_SIZE); AllocCodeSpace(CODE_SIZE);
blocks.Init(); blocks.Init();
asm_routines.Init(nullptr); asm_routines.Init(nullptr);

View File

@ -151,6 +151,44 @@ void CommonAsmRoutines::GenFres()
RET(); RET();
} }
void CommonAsmRoutines::GenMfcr()
{
// Input: none
// Output: RSCRATCH
// This function clobbers all three RSCRATCH.
X64Reg dst = RSCRATCH;
X64Reg tmp = RSCRATCH2;
X64Reg cr_val = RSCRATCH_EXTRA;
XOR(32, R(dst), R(dst));
// we only need to zero the high bits of tmp once
XOR(32, R(tmp), R(tmp));
for (int i = 0; i < 8; i++)
{
static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 };
if (i != 0)
SHL(32, R(dst), Imm8(4));
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
// EQ: Bits 31-0 == 0; set flag bit 1
TEST(32, R(cr_val), R(cr_val));
// FIXME: is there a better way to do this without the partial register merging?
SETcc(CC_Z, R(tmp));
LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
// GT: Value > 0; set flag bit 2
TEST(64, R(cr_val), R(cr_val));
SETcc(CC_G, R(tmp));
LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0));
// SO: Bit 61 set; set flag bit 0
// LT: Bit 62 set; set flag bit 3
SHR(64, R(cr_val), Imm8(61));
OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable));
}
RET();
}
// Safe + Fast Quantizers, originally from JITIL by magumagu // Safe + Fast Quantizers, originally from JITIL by magumagu
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };

View File

@ -25,6 +25,7 @@ public:
const u8 *frsqrte; const u8 *frsqrte;
const u8 *fres; const u8 *fres;
const u8 *mfcr;
// In: array index: GQR to use. // In: array index: GQR to use.
// In: ECX: Address to read from. // In: ECX: Address to read from.
@ -58,4 +59,5 @@ public:
void GenFifoWrite(int size); void GenFifoWrite(int size);
void GenFrsqrte(); void GenFrsqrte();
void GenFres(); void GenFres();
void GenMfcr();
}; };

View File

@ -73,9 +73,16 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
BitSet32 registersInUse = it->second; BitSet32 registersInUse = it->second;
u8* exceptionHandler = NULL;
if (jit->js.memcheck)
{
auto it2 = exceptionHandlerAtLoc.find(codePtr);
if (it2 != exceptionHandlerAtLoc.end())
exceptionHandler = it2->second;
}
if (!info.isMemoryWrite) if (!info.isMemoryWrite)
{ {
XEmitter emitter(codePtr);
int bswapNopCount; int bswapNopCount;
if (info.byteSwap || info.operandSize == 1) if (info.byteSwap || info.operandSize == 1)
bswapNopCount = 0; bswapNopCount = 0;
@ -101,9 +108,11 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
totalSize += 3; totalSize += 3;
} }
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse); XEmitter emitter(codePtr);
emitter.CALL((void *)trampoline);
int padding = totalSize - BACKPATCH_SIZE; int padding = totalSize - BACKPATCH_SIZE;
u8* returnPtr = codePtr + 5 + padding;
const u8* trampoline = trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
emitter.JMP(trampoline, true);
if (padding > 0) if (padding > 0)
{ {
emitter.NOP(padding); emitter.NOP(padding);
@ -113,14 +122,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
else else
{ {
// TODO: special case FIFO writes. Also, support 32-bit mode. // TODO: special case FIFO writes. Also, support 32-bit mode.
auto it2 = pcAtLoc.find(codePtr); auto it3 = pcAtLoc.find(codePtr);
if (it2 == pcAtLoc.end()) if (it3 == pcAtLoc.end())
{ {
PanicAlert("BackPatch: no pc entry for address %p", codePtr); PanicAlert("BackPatch: no pc entry for address %p", codePtr);
return nullptr; return nullptr;
} }
u32 pc = it2->second; u32 pc = it3->second;
u8 *start; u8 *start;
if (info.byteSwap || info.hasImmediate) if (info.byteSwap || info.hasImmediate)
@ -154,9 +163,10 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
start = codePtr - bswapSize; start = codePtr - bswapSize;
} }
XEmitter emitter(start); XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, pc); ptrdiff_t padding = (codePtr - (start + 5)) + info.instructionSize;
emitter.CALL((void *)trampoline); u8* returnPtr = start + 5 + padding;
ptrdiff_t padding = (codePtr - emitter.GetCodePtr()) + info.instructionSize; const u8* trampoline = trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
emitter.JMP(trampoline, true);
if (padding > 0) if (padding > 0)
{ {
emitter.NOP(padding); emitter.NOP(padding);

View File

@ -73,6 +73,16 @@ protected:
int downcountAmount; int downcountAmount;
u32 numLoadStoreInst; u32 numLoadStoreInst;
u32 numFloatingPointInst; u32 numFloatingPointInst;
// If this is set, we need to generate an exception handler for the fastmem load.
u8* fastmemLoadStore;
// If this is set, a load or store already prepared a jump to the exception handler for us,
// so just fixup that branch instead of testing for a DSI again.
bool fixupExceptionHandler;
Gen::FixupBranch exceptionHandler;
// If these are set, we've stored the old value of a register which will be loaded in revertLoad,
// which lets us revert it on the exception path.
int revertGprLoad;
int revertFprLoad;
bool firstFPInstructionFound; bool firstFPInstructionFound;
bool isLastInstruction; bool isLastInstruction;

View File

@ -13,6 +13,16 @@
using namespace Gen; using namespace Gen;
void EmuCodeBlock::MemoryExceptionCheck()
{
if (jit->js.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
jit->js.exceptionHandler = J_CC(Gen::CC_NZ, true);
jit->js.fixupExceptionHandler = true;
}
}
void EmuCodeBlock::LoadAndSwap(int size, Gen::X64Reg dst, const Gen::OpArg& src) void EmuCodeBlock::LoadAndSwap(int size, Gen::X64Reg dst, const Gen::OpArg& src)
{ {
if (cpu_info.bMOVBE) if (cpu_info.bMOVBE)
@ -291,11 +301,8 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, B
} }
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
{
if (!jit->js.memcheck)
{ {
registersInUse[reg_value] = false; registersInUse[reg_value] = false;
}
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
!opAddress.IsImm() && !opAddress.IsImm() &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM))
@ -307,6 +314,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
registersInUseAtLoc[mov] = registersInUse; registersInUseAtLoc[mov] = registersInUse;
jit->js.fastmemLoadStore = mov;
} }
else else
{ {
@ -349,7 +357,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
} }
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
MEMCHECK_START(false) MemoryExceptionCheck();
if (signExtend && accessSize < 32) if (signExtend && accessSize < 32)
{ {
// Need to sign extend values coming from the Read_U* functions. // Need to sign extend values coming from the Read_U* functions.
@ -359,7 +367,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
{ {
MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
} }
MEMCHECK_END
} }
} }
else else
@ -399,7 +406,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
} }
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
MEMCHECK_START(false) MemoryExceptionCheck();
if (signExtend && accessSize < 32) if (signExtend && accessSize < 32)
{ {
// Need to sign extend values coming from the Read_U* functions. // Need to sign extend values coming from the Read_U* functions.
@ -409,7 +416,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
{ {
MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
} }
MEMCHECK_END
if (farcode.Enabled()) if (farcode.Enabled())
{ {
@ -547,8 +553,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
reg_value = FixImmediate(accessSize, reg_value); reg_value = FixImmediate(accessSize, reg_value);
// TODO: support byte-swapped non-immediate fastmem stores // TODO: support byte-swapped non-immediate fastmem stores
if (!jit->js.memcheck && if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
!(flags & SAFE_LOADSTORE_NO_FASTMEM) && !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
(reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)) (reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP))
#ifdef ENABLE_MEM_CHECK #ifdef ENABLE_MEM_CHECK
@ -566,6 +571,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
registersInUseAtLoc[mov] = registersInUse; registersInUseAtLoc[mov] = registersInUse;
pcAtLoc[mov] = jit->js.compilerPC; pcAtLoc[mov] = jit->js.compilerPC;
jit->js.fastmemLoadStore = mov;
return; return;
} }

View File

@ -12,18 +12,6 @@
namespace MMIO { class Mapping; } namespace MMIO { class Mapping; }
// If inv is true, invert the check (i.e. skip over the associated code if an exception hits,
// instead of skipping over the code if an exception isn't hit).
#define MEMCHECK_START(inv) \
Gen::FixupBranch memException; \
if (jit->js.memcheck) \
{ TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \
memException = J_CC((inv) ? Gen::CC_Z : Gen::CC_NZ, true); }
#define MEMCHECK_END \
if (jit->js.memcheck) \
SetJumpTarget(memException);
// We offset by 0x80 because the range of one byte memory offsets is // We offset by 0x80 because the range of one byte memory offsets is
// -0x80..0x7f. // -0x80..0x7f.
#define PPCSTATE(x) MDisp(RPPCSTATE, \ #define PPCSTATE(x) MDisp(RPPCSTATE, \
@ -54,6 +42,10 @@ static const int CODE_SIZE = 1024 * 1024 * 32;
static const int FARCODE_SIZE = 1024 * 1024 * 8; static const int FARCODE_SIZE = 1024 * 1024 * 8;
static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48; static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
// same for the trampoline code cache, because fastmem results in far more backpatches in MMU mode
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
// Like XCodeBlock but has some utilities for memory access. // Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::X64CodeBlock class EmuCodeBlock : public Gen::X64CodeBlock
{ {
@ -61,6 +53,8 @@ public:
FarCodeCache farcode; FarCodeCache farcode;
u8* nearcode; // Backed up when we switch to far code. u8* nearcode; // Backed up when we switch to far code.
void MemoryExceptionCheck();
// Simple functions to switch between near and far code emitting // Simple functions to switch between near and far code emitting
void SwitchToFarCode() void SwitchToFarCode()
{ {
@ -141,4 +135,5 @@ public:
protected: protected:
std::unordered_map<u8 *, BitSet32> registersInUseAtLoc; std::unordered_map<u8 *, BitSet32> registersInUseAtLoc;
std::unordered_map<u8 *, u32> pcAtLoc; std::unordered_map<u8 *, u32> pcAtLoc;
std::unordered_map<u8 *, u8 *> exceptionHandlerAtLoc;
}; };

View File

@ -19,37 +19,22 @@
using namespace Gen; using namespace Gen;
void TrampolineCache::Init() void TrampolineCache::Init(int size)
{ {
AllocCodeSpace(8 * 1024 * 1024); AllocCodeSpace(size);
} }
void TrampolineCache::ClearCodeSpace() void TrampolineCache::ClearCodeSpace()
{ {
X64CodeBlock::ClearCodeSpace(); X64CodeBlock::ClearCodeSpace();
cachedTrampolines.clear();
} }
void TrampolineCache::Shutdown() void TrampolineCache::Shutdown()
{ {
FreeCodeSpace(); FreeCodeSpace();
cachedTrampolines.clear();
} }
const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse) const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr)
{
TrampolineCacheKey key = { registersInUse, 0, info };
auto it = cachedTrampolines.find(key);
if (it != cachedTrampolines.end())
return it->second;
const u8* trampoline = GenerateReadTrampoline(info, registersInUse);
cachedTrampolines[key] = trampoline;
return trampoline;
}
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
@ -57,57 +42,63 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B
const u8* trampoline = GetCodePtr(); const u8* trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg; X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg; X64Reg dataReg = (X64Reg)info.regOperandReg;
registersInUse[addrReg] = true; int stack_offset = 0;
registersInUse[dataReg] = false; bool push_param1 = registersInUse[ABI_PARAM1];
// It's a read. Easy. if (push_param1)
// RSP alignment here is 8 due to the call. {
ABI_PushRegistersAndAdjustStack(registersInUse, 8); PUSH(ABI_PARAM1);
stack_offset = 8;
registersInUse[ABI_PARAM1] = 0;
}
int dataRegSize = info.operandSize == 8 ? 64 : 32; int dataRegSize = info.operandSize == 8 ? 64 : 32;
MOVTwo(dataRegSize, ABI_PARAM1, addrReg, ABI_PARAM2, dataReg); if (addrReg != ABI_PARAM1 && info.displacement)
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
if (info.displacement) else if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
switch (info.operandSize) switch (info.operandSize)
{ {
case 8: case 8:
CALL((void *)&Memory::Read_U64_Val); CALL((void *)&Memory::Read_U64);
break; break;
case 4: case 4:
CALL((void *)&Memory::Read_U32_Val); CALL((void *)&Memory::Read_U32);
break; break;
case 2: case 2:
CALL(info.signExtend ? (void *)&Memory::Read_S16_Val : (void *)&Memory::Read_U16_Val); CALL((void *)&Memory::Read_U16);
break; break;
case 1: case 1:
CALL(info.signExtend ? (void *)&Memory::Read_S8_Val : (void *)&Memory::Read_U8_Val); CALL((void *)&Memory::Read_U8);
break; break;
} }
if (dataReg != ABI_RETURN) ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
MOV(dataRegSize, R(dataReg), R(ABI_RETURN));
ABI_PopRegistersAndAdjustStack(registersInUse, 8); if (push_param1)
RET(); POP(ABI_PARAM1);
return trampoline;
}
const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc) if (exceptionHandler)
{ {
TrampolineCacheKey key = { registersInUse, pc, info }; TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
auto it = cachedTrampolines.find(key); if (info.signExtend)
if (it != cachedTrampolines.end()) MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
return it->second; else if (dataReg != ABI_RETURN || info.operandSize < 4)
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
const u8* trampoline = GenerateWriteTrampoline(info, registersInUse, pc); JMP(returnPtr, true);
cachedTrampolines[key] = trampoline;
return trampoline; return trampoline;
} }
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc) const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
@ -117,20 +108,23 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
X64Reg dataReg = (X64Reg)info.regOperandReg; X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg; X64Reg addrReg = (X64Reg)info.scaledReg;
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
// hardware access - we can take shortcuts.
// Don't treat FIFO writes specially for now because they require a burst // Don't treat FIFO writes specially for now because they require a burst
// check anyway. // check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc)); MOV(32, PPCSTATE(pc), Imm32(pc));
ABI_PushRegistersAndAdjustStack(registersInUse, 8); ABI_PushRegistersAndAdjustStack(registersInUse, 0);
if (info.hasImmediate) if (info.hasImmediate)
{ {
if (addrReg != ABI_PARAM2) if (addrReg != ABI_PARAM2 && info.displacement)
MOV(64, R(ABI_PARAM2), R(addrReg)); LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM2)
MOV(32, R(ABI_PARAM2), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
// we have to swap back the immediate to pass it to the write functions // we have to swap back the immediate to pass it to the write functions
switch (info.operandSize) switch (info.operandSize)
{ {
@ -150,11 +144,8 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
} }
else else
{ {
MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg); int dataRegSize = info.operandSize == 8 ? 64 : 32;
} MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
if (info.displacement)
{
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
} }
switch (info.operandSize) switch (info.operandSize)
@ -173,31 +164,13 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info,
break; break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, 8); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
RET(); if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
JMP(returnPtr, true);
return trampoline; return trampoline;
} }
size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const
{
size_t res = std::hash<int>()(k.registersInUse.m_val);
res ^= std::hash<int>()(k.info.operandSize) >> 1;
res ^= std::hash<int>()(k.info.regOperandReg) >> 2;
res ^= std::hash<int>()(k.info.scaledReg) >> 3;
res ^= std::hash<u64>()(k.info.immediate) >> 4;
res ^= std::hash<int>()(k.pc) >> 5;
res ^= std::hash<int>()(k.info.displacement) << 1;
res ^= std::hash<bool>()(k.info.signExtend) << 2;
res ^= std::hash<bool>()(k.info.hasImmediate) << 3;
res ^= std::hash<bool>()(k.info.isMemoryWrite) << 4;
return res;
}
bool TrampolineCacheKey::operator==(const TrampolineCacheKey &other) const
{
return pc == other.pc &&
registersInUse == other.registersInUse &&
info == other.info;
}

View File

@ -14,33 +14,13 @@
// We need at least this many bytes for backpatching. // We need at least this many bytes for backpatching.
const int BACKPATCH_SIZE = 5; const int BACKPATCH_SIZE = 5;
struct TrampolineCacheKey
{
BitSet32 registersInUse;
u32 pc;
InstructionInfo info;
bool operator==(const TrampolineCacheKey &other) const;
};
struct TrampolineCacheKeyHasher
{
size_t operator()(const TrampolineCacheKey& k) const;
};
class TrampolineCache : public Gen::X64CodeBlock class TrampolineCache : public Gen::X64CodeBlock
{ {
public: public:
void Init(); void Init(int size);
void Shutdown(); void Shutdown();
const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse); const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr);
const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc); const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc);
void ClearCodeSpace(); void ClearCodeSpace();
private:
const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse);
const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc);
std::unordered_map<TrampolineCacheKey, const u8*, TrampolineCacheKeyHasher> cachedTrampolines;
}; };

View File

@ -211,7 +211,7 @@ namespace JitInterface
{ {
if (bMMU && !bFakeVMEM && (_Address & Memory::ADDR_MASK_MEM1)) if (bMMU && !bFakeVMEM && (_Address & Memory::ADDR_MASK_MEM1))
{ {
_Address = Memory::TranslateAddress(_Address, Memory::FLAG_OPCODE); _Address = Memory::TranslateAddress<Memory::FLAG_OPCODE>(_Address);
if (_Address == 0) if (_Address == 0)
{ {
return 0; return 0;

View File

@ -649,7 +649,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
bool virtualAddr = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && (address & JIT_ICACHE_VMEM_BIT); bool virtualAddr = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && (address & JIT_ICACHE_VMEM_BIT);
if (virtualAddr) if (virtualAddr)
{ {
if (!Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION)) if (!Memory::TranslateAddress<Memory::FLAG_NO_EXCEPTION>(address))
{ {
// Memory exception occurred during instruction fetch // Memory exception occurred during instruction fetch
block->m_memory_exception = true; block->m_memory_exception = true;
@ -670,6 +670,15 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
if (inst.hex != 0) if (inst.hex != 0)
{ {
// Slight hack: the JIT block cache currently assumes all blocks end at the same place,
// but broken blocks due to page faults break this assumption. Avoid this by just ending
// all virtual memory instruction blocks at page boundaries.
// FIXME: improve the JIT block cache so we don't need to do this.
if (virtualAddr && i > 0 && (address & 0xfff) == 0)
{
break;
}
num_inst++; num_inst++;
memset(&code[i], 0, sizeof(CodeOp)); memset(&code[i], 0, sizeof(CodeOp));
GekkoOPInfo *opinfo = GetOpInfo(inst); GekkoOPInfo *opinfo = GetOpInfo(inst);

View File

@ -125,12 +125,12 @@ void Init(int cpu_core)
{ {
for (int set = 0; set < 64; set++) for (int set = 0; set < 64; set++)
{ {
ppcState.tlb[tlb][set].recent = 0;
for (int way = 0; way < 2; way++) for (int way = 0; way < 2; way++)
{ {
ppcState.tlb[tlb][set][way].flags = TLB_FLAG_INVALID; ppcState.tlb[tlb][set].paddr[way] = 0;
ppcState.tlb[tlb][set][way].paddr = 0; ppcState.tlb[tlb][set].pte[way] = 0;
ppcState.tlb[tlb][set][way].pte = 0; ppcState.tlb[tlb][set].tag[way] = TLB_TAG_INVALID;
ppcState.tlb[tlb][set][way].tag = 0;
} }
} }
} }

View File

@ -29,22 +29,21 @@ enum CoreMode
// TLB cache // TLB cache
#define TLB_SIZE 128 #define TLB_SIZE 128
#define TLB_WAYS 2
#define NUM_TLBS 2 #define NUM_TLBS 2
#define TLB_WAYS 2
#define HW_PAGE_INDEX_SHIFT 12 #define HW_PAGE_INDEX_SHIFT 12
#define HW_PAGE_INDEX_MASK 0x3f #define HW_PAGE_INDEX_MASK 0x3f
#define HW_PAGE_TAG_SHIFT 18 #define HW_PAGE_TAG_SHIFT 18
#define TLB_FLAG_MOST_RECENT 0x01 #define TLB_TAG_INVALID 0xffffffff
#define TLB_FLAG_INVALID 0x02
struct tlb_entry struct tlb_entry
{ {
u32 tag; u32 tag[TLB_WAYS];
u32 paddr; u32 paddr[TLB_WAYS];
u32 pte; u32 pte[TLB_WAYS];
u8 flags; u8 recent;
}; };
// This contains the entire state of the emulated PowerPC "Gekko" CPU. // This contains the entire state of the emulated PowerPC "Gekko" CPU.
@ -107,7 +106,7 @@ struct GC_ALIGNED64(PowerPCState)
// also for power management, but we don't care about that. // also for power management, but we don't care about that.
u32 spr[1024]; u32 spr[1024];
tlb_entry tlb[NUM_TLBS][TLB_SIZE / TLB_WAYS][TLB_WAYS]; tlb_entry tlb[NUM_TLBS][TLB_SIZE / TLB_WAYS];
u32 pagetable_base; u32 pagetable_base;
u32 pagetable_hashmask; u32 pagetable_hashmask;

View File

@ -64,7 +64,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread; static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system // Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 37; static const u32 STATE_VERSION = 38;
enum enum
{ {