diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 44818ceb67..5885031411 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -181,20 +181,26 @@ void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) // Pass two registers as parameters. void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { - MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2); + MOVTwo(64, ABI_PARAM1, reg1, 0, ABI_PARAM2, reg2); ABI_CallFunction(func); } -void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2) +void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, s32 offset1, Gen::X64Reg dst2, Gen::X64Reg src2) { if (dst1 == src2 && dst2 == src1) { XCHG(bits, R(src1), R(src2)); + if (offset1) + ADD(bits, R(dst1), Imm32(offset1)); } else if (src2 != dst1) { - if (dst1 != src1) + if (dst1 != src1 && offset1) + LEA(bits, dst1, MDisp(src1, offset1)); + else if (dst1 != src1) MOV(bits, R(dst1), R(src1)); + else if (offset1) + ADD(bits, R(dst1), Imm32(offset1)); if (dst2 != src2) MOV(bits, R(dst2), R(src2)); } @@ -202,8 +208,12 @@ void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg { if (dst2 != src2) MOV(bits, R(dst2), R(src2)); - if (dst1 != src1) + if (dst1 != src1 && offset1) + LEA(bits, dst1, MDisp(src1, offset1)); + else if (dst1 != src1) MOV(bits, R(dst1), R(src1)); + else if (offset1) + ADD(bits, R(dst1), Imm32(offset1)); } } diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 03aeea4b13..142308e799 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -888,7 +888,7 @@ public: void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); // Helper method for the above, or can be used separately. - void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2); + void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, s32 offset, Gen::X64Reg dst2, Gen::X64Reg src2); // Saves/restores the registers and adjusts the stack to be aligned as // required by the ABI, where the previous alignment was as specified. diff --git a/Source/Core/Core/Debugger/PPCDebugInterface.cpp b/Source/Core/Core/Debugger/PPCDebugInterface.cpp index ddc2fec12e..1434fdb5bd 100644 --- a/Source/Core/Core/Debugger/PPCDebugInterface.cpp +++ b/Source/Core/Core/Debugger/PPCDebugInterface.cpp @@ -28,7 +28,7 @@ std::string PPCDebugInterface::Disassemble(unsigned int address) if (!Memory::IsRAMAddress(address, true, true)) { if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || !((address & JIT_ICACHE_VMEM_BIT) && - Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION))) + Memory::TranslateAddress(address))) { return "(No RAM here)"; } diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index fab79ba1bf..4f91f558ff 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -188,9 +188,9 @@ bool AreMemoryBreakpointsActivated() #endif } -u32 Read_Instruction(const u32 em_address) +u32 Read_Instruction(const u32 address) { - UGeckoInstruction inst = ReadUnchecked_U32(em_address); + UGeckoInstruction inst = ReadUnchecked_U32(address); return inst.hex; } @@ -235,48 +235,48 @@ void Memset(const u32 _Address, const u8 _iValue, const u32 _iLength) } } -void ClearCacheLine(const u32 _Address) +void ClearCacheLine(const u32 address) { // FIXME: does this do the right thing if dcbz is run on hardware memory, e.g. // the FIFO? Do games even do that? Probably not, but we should try to be correct... for (u32 i = 0; i < 32; i += 8) - Write_U64(0, _Address + i); + Write_U64(0, address + i); } -void DMA_LCToMemory(const u32 _MemAddr, const u32 _CacheAddr, const u32 _iNumBlocks) +void DMA_LCToMemory(const u32 memAddr, const u32 cacheAddr, const u32 numBlocks) { - const u8* src = m_pL1Cache + (_CacheAddr & 0x3FFFF); - u8* dst = GetPointer(_MemAddr); + const u8* src = m_pL1Cache + (cacheAddr & 0x3FFFF); + u8* dst = GetPointer(memAddr); - if ((dst != nullptr) && (src != nullptr) && (_MemAddr & 3) == 0 && (_CacheAddr & 3) == 0) + if ((dst != nullptr) && (src != nullptr) && (memAddr & 3) == 0 && (cacheAddr & 3) == 0) { - memcpy(dst, src, 32 * _iNumBlocks); + memcpy(dst, src, 32 * numBlocks); } else { - for (u32 i = 0; i < 32 * _iNumBlocks; i++) + for (u32 i = 0; i < 32 * numBlocks; i++) { - u8 Temp = Read_U8(_CacheAddr + i); - Write_U8(Temp, _MemAddr + i); + u8 Temp = Read_U8(cacheAddr + i); + Write_U8(Temp, memAddr + i); } } } -void DMA_MemoryToLC(const u32 _CacheAddr, const u32 _MemAddr, const u32 _iNumBlocks) +void DMA_MemoryToLC(const u32 cacheAddr, const u32 memAddr, const u32 numBlocks) { - const u8* src = GetPointer(_MemAddr); - u8* dst = m_pL1Cache + (_CacheAddr & 0x3FFFF); + const u8* src = GetPointer(memAddr); + u8* dst = m_pL1Cache + (cacheAddr & 0x3FFFF); - if ((dst != nullptr) && (src != nullptr) && (_MemAddr & 3) == 0 && (_CacheAddr & 3) == 0) + if ((dst != nullptr) && (src != nullptr) && (memAddr & 3) == 0 && (cacheAddr & 3) == 0) { - memcpy(dst, src, 32 * _iNumBlocks); + memcpy(dst, src, 32 * numBlocks); } else { - for (u32 i = 0; i < 32 * _iNumBlocks; i++) + for (u32 i = 0; i < 32 * numBlocks; i++) { - u8 Temp = Read_U8(_MemAddr + i); - Write_U8(Temp, _CacheAddr + i); + u8 Temp = Read_U8(memAddr + i); + Write_U8(Temp, cacheAddr + i); } } } @@ -301,16 +301,16 @@ std::string GetString(u32 em_address, size_t size) // GetPointer must always return an address in the bottom 32 bits of address space, so that 64-bit // programs don't have problems directly addressing any part of memory. // TODO re-think with respect to other BAT setups... -u8* GetPointer(const u32 _Address) +u8* GetPointer(const u32 address) { - switch (_Address >> 28) + switch (address >> 28) { case 0x0: case 0x8: - if ((_Address & 0xfffffff) < REALRAM_SIZE) - return m_pRAM + (_Address & RAM_MASK); + if ((address & 0xfffffff) < REALRAM_SIZE) + return m_pRAM + (address & RAM_MASK); case 0xc: - switch (_Address >> 24) + switch (address >> 24) { case 0xcc: case 0xcd: @@ -320,8 +320,8 @@ u8* GetPointer(const u32 _Address) break; default: - if ((_Address & 0xfffffff) < REALRAM_SIZE) - return m_pRAM + (_Address & RAM_MASK); + if ((address & 0xfffffff) < REALRAM_SIZE) + return m_pRAM + (address & RAM_MASK); } case 0x1: @@ -329,53 +329,53 @@ u8* GetPointer(const u32 _Address) case 0xd: if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii) { - if ((_Address & 0xfffffff) < EXRAM_SIZE) - return m_pEXRAM + (_Address & EXRAM_MASK); + if ((address & 0xfffffff) < EXRAM_SIZE) + return m_pEXRAM + (address & EXRAM_MASK); } else break; case 0xe: - if (_Address < (0xE0000000 + L1_CACHE_SIZE)) - return m_pL1Cache + (_Address & L1_CACHE_MASK); + if (address < (0xE0000000 + L1_CACHE_SIZE)) + return m_pL1Cache + (address & L1_CACHE_MASK); else break; default: if (bFakeVMEM) - return m_pFakeVMEM + (_Address & FAKEVMEM_MASK); + return m_pFakeVMEM + (address & FAKEVMEM_MASK); } - ERROR_LOG(MEMMAP, "Unknown Pointer %#8x PC %#8x LR %#8x", _Address, PC, LR); + ERROR_LOG(MEMMAP, "Unknown Pointer %#8x PC %#8x LR %#8x", address, PC, LR); return nullptr; } -bool IsRAMAddress(const u32 addr, bool allow_locked_cache, bool allow_fake_vmem) +bool IsRAMAddress(const u32 address, bool allow_locked_cache, bool allow_fake_vmem) { - switch ((addr >> 24) & 0xFC) + switch ((address >> 24) & 0xFC) { case 0x00: case 0x80: case 0xC0: - if ((addr & 0x1FFFFFFF) < RAM_SIZE) + if ((address & 0x1FFFFFFF) < RAM_SIZE) return true; else return false; case 0x10: case 0x90: case 0xD0: - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && (addr & 0x0FFFFFFF) < EXRAM_SIZE) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && (address & 0x0FFFFFFF) < EXRAM_SIZE) return true; else return false; case 0xE0: - if (allow_locked_cache && addr - 0xE0000000 < L1_CACHE_SIZE) + if (allow_locked_cache && address - 0xE0000000 < L1_CACHE_SIZE) return true; else return false; case 0x7C: - if (allow_fake_vmem && bFakeVMEM && addr >= 0x7E000000) + if (allow_fake_vmem && bFakeVMEM && address >= 0x7E000000) return true; else return false; diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 8a8b5b42e1..f7915367cf 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -74,64 +74,57 @@ void Clear(); bool AreMemoryBreakpointsActivated(); // ONLY for use by GUI -u8 ReadUnchecked_U8(const u32 _Address); -u32 ReadUnchecked_U32(const u32 _Address); +u8 ReadUnchecked_U8(const u32 address); +u32 ReadUnchecked_U32(const u32 address); -void WriteUnchecked_U8(const u8 _Data, const u32 _Address); -void WriteUnchecked_U32(const u32 _Data, const u32 _Address); +void WriteUnchecked_U8(const u8 var, const u32 address); +void WriteUnchecked_U32(const u32 var, const u32 address); -bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false, bool allow_fake_vmem = false); +bool IsRAMAddress(const u32 address, bool allow_locked_cache = false, bool allow_fake_vmem = false); // used by interpreter to read instructions, uses iCache -u32 Read_Opcode(const u32 _Address); +u32 Read_Opcode(const u32 address); // this is used by Debugger a lot. // For now, just reads from memory! -u32 Read_Instruction(const u32 _Address); +u32 Read_Instruction(const u32 address); // For use by emulator -u8 Read_U8(const u32 _Address); -u16 Read_U16(const u32 _Address); -u32 Read_U32(const u32 _Address); -u64 Read_U64(const u32 _Address); - -u32 Read_S8_Val(u32 address, u32 val); -u32 Read_U8_Val(u32 address, u32 val); -u32 Read_S16_Val(u32 address, u32 val); -u32 Read_U16_Val(u32 address, u32 val); -u32 Read_U32_Val(u32 address, u32 val); -u64 Read_U64_Val(u32 address, u64 val); +u8 Read_U8(const u32 address); +u16 Read_U16(const u32 address); +u32 Read_U32(const u32 address); +u64 Read_U64(const u32 address); // Useful helper functions, used by ARM JIT -float Read_F32(const u32 _Address); -double Read_F64(const u32 _Address); +float Read_F32(const u32 address); +double Read_F64(const u32 address); // used by JIT. Return zero-extended 32bit values -u32 Read_U8_ZX(const u32 _Address); -u32 Read_U16_ZX(const u32 _Address); +u32 Read_U8_ZX(const u32 address); +u32 Read_U16_ZX(const u32 address); -void Write_U8(const u8 _Data, const u32 _Address); -void Write_U16(const u16 _Data, const u32 _Address); -void Write_U32(const u32 _Data, const u32 _Address); -void Write_U64(const u64 _Data, const u32 _Address); +void Write_U8(const u8 var, const u32 address); +void Write_U16(const u16 var, const u32 address); +void Write_U32(const u32 var, const u32 address); +void Write_U64(const u64 var, const u32 address); -void Write_U16_Swap(const u16 _Data, const u32 _Address); -void Write_U32_Swap(const u32 _Data, const u32 _Address); -void Write_U64_Swap(const u64 _Data, const u32 _Address); +void Write_U16_Swap(const u16 var, const u32 address); +void Write_U32_Swap(const u32 var, const u32 address); +void Write_U64_Swap(const u64 var, const u32 address); // Useful helper functions, used by ARM JIT -void Write_F64(const double _Data, const u32 _Address); +void Write_F64(const double var, const u32 address); std::string GetString(u32 em_address, size_t size = 0); -u8* GetPointer(const u32 _Address); -void DMA_LCToMemory(const u32 _iMemAddr, const u32 _iCacheAddr, const u32 _iNumBlocks); -void DMA_MemoryToLC(const u32 _iCacheAddr, const u32 _iMemAddr, const u32 _iNumBlocks); +u8* GetPointer(const u32 address); +void DMA_LCToMemory(const u32 memAddr, const u32 cacheAddr, const u32 numBlocks); +void DMA_MemoryToLC(const u32 cacheAddr, const u32 memAddr, const u32 numBlocks); void CopyFromEmu(void* data, u32 address, size_t size); void CopyToEmu(u32 address, const void* data, size_t size); -void Memset(const u32 _Address, const u8 _Data, const u32 _iLength); -void ClearCacheLine(const u32 _Address); // Zeroes 32 bytes; address should be 32-byte-aligned +void Memset(const u32 address, const u8 var, const u32 length); +void ClearCacheLine(const u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned // TLB functions void SDRUpdated(); @@ -142,8 +135,8 @@ enum XCheckTLBFlag FLAG_WRITE, FLAG_OPCODE, }; -u32 TranslateAddress(u32 _Address, XCheckTLBFlag _Flag); -void InvalidateTLBEntry(u32 _Address); +template u32 TranslateAddress(const u32 address); +void InvalidateTLBEntry(u32 address); extern u32 pagetable_base; extern u32 pagetable_hashmask; } diff --git a/Source/Core/Core/HW/MemmapFunctions.cpp b/Source/Core/Core/HW/MemmapFunctions.cpp index 0f91e4ea87..d52bb6f5ad 100644 --- a/Source/Core/Core/HW/MemmapFunctions.cpp +++ b/Source/Core/Core/HW/MemmapFunctions.cpp @@ -16,6 +16,7 @@ // https://github.com/dolphin-emu/dolphin #include "Common/Atomic.h" +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Core/ConfigManager.h" @@ -91,243 +92,187 @@ static u32 EFB_Read(const u32 addr) static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite); -template -__forceinline void ReadFromHardware(U &_var, const u32 em_address, Memory::XCheckTLBFlag flag) +template +__forceinline T ReadFromHardware(const u32 em_address) { - // TODO: Figure out the fastest order of tests for both read and write (they are probably different). - if ((em_address & 0xC8000000) == 0xC8000000) + int segment = em_address >> 28; + // Quick check for an address that can't meet any of the following conditions, + // to speed up the MMU path. + if (!BitSet32(0xCFC)[segment]) { - if (em_address < 0xcc000000) - _var = EFB_Read(em_address); - else - _var = (T)mmio_mapping->Read::type>(em_address); + // TODO: Figure out the fastest order of tests for both read and write (they are probably different). + if ((em_address & 0xC8000000) == 0xC8000000) + { + if (em_address < 0xcc000000) + return EFB_Read(em_address); + else + return (T)mmio_mapping->Read::type>(em_address); + } + else if (segment == 0x8 || segment == 0xC || segment == 0x0) + { + return bswap((*(const T*)&m_pRAM[em_address & RAM_MASK])); + } + else if (m_pEXRAM && (segment == 0x9 || segment == 0xD || segment == 0x1)) + { + return bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK])); + } + else if (segment == 0xE && (em_address < (0xE0000000 + L1_CACHE_SIZE))) + { + return bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK])); + } } - else if (((em_address & 0xF0000000) == 0x80000000) || - ((em_address & 0xF0000000) == 0xC0000000) || - ((em_address & 0xF0000000) == 0x00000000)) - { - _var = bswap((*(const T*)&m_pRAM[em_address & RAM_MASK])); - } - else if (m_pEXRAM && (((em_address & 0xF0000000) == 0x90000000) || - ((em_address & 0xF0000000) == 0xD0000000) || - ((em_address & 0xF0000000) == 0x10000000))) - { - _var = bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK])); - } - else if ((em_address >= 0xE0000000) && (em_address < (0xE0000000+L1_CACHE_SIZE))) - { - _var = bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK])); - } - else if ((bFakeVMEM && ((em_address &0xF0000000) == 0x70000000)) || - (bFakeVMEM && ((em_address &0xF0000000) == 0x40000000))) + + if (bFakeVMEM && (segment == 0x7 || segment == 0x4)) { // fake VMEM - _var = bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK])); + return bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK])); } - else + + // MMU: Do page table translation + u32 tlb_addr = TranslateAddress(em_address); + if (tlb_addr == 0) { - // MMU - // Handle loads that cross page boundaries (ewwww) - if (sizeof(T) > 1 && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) - { - _var = 0; - // This could be unaligned down to the byte level... hopefully this is rare, so doing it this - // way isn't too terrible. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! - u32 tlb_addr = TranslateAddress(em_address, flag); - for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++) - { - // Start of the new page... translate the address again! - if (!(addr & (HW_PAGE_SIZE-1))) - tlb_addr = TranslateAddress(addr, flag); - // Important: we need to generate the DSI on the first store that caused the fault, NOT - // the address of the start of the load. - if (tlb_addr == 0) - { - if (flag == FLAG_READ) - { - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU) - PanicAlertT("Invalid Read at 0x%08x, PC = 0x%08x ", em_address, PC); - else - GenerateDSIException(addr, false); - break; - } - } - else - { - if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000) - { - _var <<= 8; - _var |= m_pEXRAM[tlb_addr & EXRAM_MASK]; - } - else - { - _var <<= 8; - _var |= m_pRAM[tlb_addr & RAM_MASK]; - } - } - } - } - else - { - u32 tlb_addr = TranslateAddress(em_address, flag); - if (tlb_addr == 0) - { - if (flag == FLAG_READ) - { - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU) - PanicAlertT("Invalid Read at 0x%08x, PC = 0x%08x ", em_address, PC); - else - GenerateDSIException(em_address, false); - } - } - else - { - if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000) - { - _var = bswap((*(const T*)&m_pEXRAM[tlb_addr & EXRAM_MASK])); - } - else - { - _var = bswap((*(const T*)&m_pRAM[tlb_addr & RAM_MASK])); - } - } - } + if (flag == FLAG_READ) + GenerateDSIException(em_address, false); + return 0; } + + // Handle loads that cross page boundaries (ewwww) + // The alignment check isn't strictly necessary, but since this is a rare slow path, it provides a faster + // (1 instruction on x86) bailout. + if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) + { + // This could be unaligned down to the byte level... hopefully this is rare, so doing it this + // way isn't too terrible. + // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. + // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! + u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1); + u32 tlb_addr_next_page = TranslateAddress(em_address_next_page); + if (tlb_addr == 0 || tlb_addr_next_page == 0) + { + if (flag == FLAG_READ) + GenerateDSIException(em_address_next_page, false); + return 0; + } + T var = 0; + for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++) + { + if (addr == em_address_next_page) + tlb_addr = tlb_addr_next_page; + var = (var << 8) | Memory::base[tlb_addr]; + } + return var; + } + + // The easy case! + return bswap(*(const T*)&Memory::base[tlb_addr]); } -template -__forceinline void WriteToHardware(u32 em_address, const T data, Memory::XCheckTLBFlag flag) +template +__forceinline void WriteToHardware(u32 em_address, const T data) { - // First, let's check for FIFO writes, since they are probably the most common - // reason we end up in this function: - if ((em_address & 0xFFFFF000) == 0xCC008000) + int segment = em_address >> 28; + // Quick check for an address that can't meet any of the following conditions, + // to speed up the MMU path. + if (!BitSet32(0xCFC)[segment]) { - switch (sizeof(T)) + // First, let's check for FIFO writes, since they are probably the most common + // reason we end up in this function: + if ((em_address & 0xFFFFF000) == 0xCC008000) { - case 1: GPFifo::Write8((u8)data, em_address); return; - case 2: GPFifo::Write16((u16)data, em_address); return; - case 4: GPFifo::Write32((u32)data, em_address); return; - case 8: GPFifo::Write64((u64)data, em_address); return; - } - } - if ((em_address & 0xC8000000) == 0xC8000000) - { - if (em_address < 0xcc000000) - { - int x = (em_address & 0xfff) >> 2; - int y = (em_address >> 12) & 0x3ff; - - // TODO figure out a way to send data without falling into the template trap - if (em_address & 0x00400000) + switch (sizeof(T)) { - g_video_backend->Video_AccessEFB(POKE_Z, x, y, (u32)data); - DEBUG_LOG(MEMMAP, "EFB Z Write %08x @ %i, %i", (u32)data, x, y); + case 1: GPFifo::Write8((u8)data, em_address); return; + case 2: GPFifo::Write16((u16)data, em_address); return; + case 4: GPFifo::Write32((u32)data, em_address); return; + case 8: GPFifo::Write64((u64)data, em_address); return; + } + } + if ((em_address & 0xC8000000) == 0xC8000000) + { + if (em_address < 0xcc000000) + { + int x = (em_address & 0xfff) >> 2; + int y = (em_address >> 12) & 0x3ff; + + // TODO figure out a way to send data without falling into the template trap + if (em_address & 0x00400000) + { + g_video_backend->Video_AccessEFB(POKE_Z, x, y, (u32)data); + DEBUG_LOG(MEMMAP, "EFB Z Write %08x @ %i, %i", (u32)data, x, y); + } + else + { + g_video_backend->Video_AccessEFB(POKE_COLOR, x, y, (u32)data); + DEBUG_LOG(MEMMAP, "EFB Color Write %08x @ %i, %i", (u32)data, x, y); + } + return; } else { - g_video_backend->Video_AccessEFB(POKE_COLOR, x, y,(u32)data); - DEBUG_LOG(MEMMAP, "EFB Color Write %08x @ %i, %i", (u32)data, x, y); + mmio_mapping->Write(em_address, data); + return; } - return; } - else + else if (segment == 0x8 || segment == 0xC || segment == 0x0) { - mmio_mapping->Write(em_address, data); + *(T*)&m_pRAM[em_address & RAM_MASK] = bswap(data); + return; + } + else if (m_pEXRAM && (segment == 0x9 || segment == 0xD || segment == 0x1)) + { + *(T*)&m_pEXRAM[em_address & EXRAM_MASK] = bswap(data); + return; + } + else if (segment == 0xE && (em_address < (0xE0000000 + L1_CACHE_SIZE))) + { + *(T*)&m_pL1Cache[em_address & L1_CACHE_MASK] = bswap(data); return; } } - else if (((em_address & 0xF0000000) == 0x80000000) || - ((em_address & 0xF0000000) == 0xC0000000) || - ((em_address & 0xF0000000) == 0x00000000)) - { - *(T*)&m_pRAM[em_address & RAM_MASK] = bswap(data); - return; - } - else if (m_pEXRAM && (((em_address & 0xF0000000) == 0x90000000) || - ((em_address & 0xF0000000) == 0xD0000000) || - ((em_address & 0xF0000000) == 0x10000000))) - { - *(T*)&m_pEXRAM[em_address & EXRAM_MASK] = bswap(data); - return; - } - else if ((em_address >= 0xE0000000) && (em_address < (0xE0000000+L1_CACHE_SIZE))) - { - *(T*)&m_pL1Cache[em_address & L1_CACHE_MASK] = bswap(data); - return; - } - else if ((bFakeVMEM && ((em_address &0xF0000000) == 0x70000000)) || - (bFakeVMEM && ((em_address &0xF0000000) == 0x40000000))) + + if (bFakeVMEM && (segment == 0x7 || segment == 0x4)) { // fake VMEM *(T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK] = bswap(data); + return; } - else + + // MMU: Do page table translation + u32 tlb_addr = TranslateAddress(em_address); + if (tlb_addr == 0) { - // MMU - // Handle stores that cross page boundaries (ewwww) - if (sizeof(T) > 1 && (em_address & (HW_PAGE_SIZE-1)) > HW_PAGE_SIZE - sizeof(T)) - { - T val = bswap(data); - u32 tlb_addr = TranslateAddress(em_address, flag); - for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++) - { - if (!(addr & (HW_PAGE_SIZE-1))) - tlb_addr = TranslateAddress(addr, flag); - if (tlb_addr == 0) - { - if (flag == FLAG_WRITE) - { - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU) - PanicAlertT("Invalid Write to 0x%08x, PC = 0x%08x ", em_address, PC); - else - GenerateDSIException(addr, true); - break; - } - } - else - { - if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000) - { - m_pEXRAM[tlb_addr & EXRAM_MASK] = (u8)val; - val >>= 8; - } - else - { - m_pRAM[tlb_addr & RAM_MASK] = (u8)val; - val >>= 8; - } - } - } - } - else - { - u32 tlb_addr = TranslateAddress(em_address, flag); - if (tlb_addr == 0) - { - if (flag == FLAG_WRITE) - { - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU) - PanicAlertT("Invalid Write to 0x%08x, PC = 0x%08x ", em_address, PC); - else - GenerateDSIException(em_address, true); - } - } - else - { - if (m_pEXRAM && (tlb_addr & 0xF0000000) == 0x10000000) - { - *(T*)&m_pEXRAM[tlb_addr & EXRAM_MASK] = bswap(data); - } - else - { - *(T*)&m_pRAM[tlb_addr & RAM_MASK] = bswap(data); - } - } - } + if (flag == FLAG_WRITE) + GenerateDSIException(em_address, true); + return; } + + // Handle stores that cross page boundaries (ewwww) + if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) && (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) + { + T val = bswap(data); + + // We need to check both addresses before writing in case there's a DSI. + u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1); + u32 tlb_addr_next_page = TranslateAddress(em_address_next_page); + if (tlb_addr_next_page == 0) + { + if (flag == FLAG_WRITE) + GenerateDSIException(em_address_next_page, true); + return; + } + for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++, val >>= 8) + { + if (addr == em_address_next_page) + tlb_addr = tlb_addr_next_page; + Memory::base[tlb_addr] = (u8)val; + } + return; + } + + // The easy case! + *(T*)&Memory::base[tlb_addr] = bswap(data); } // ===================== @@ -339,9 +284,9 @@ __forceinline void WriteToHardware(u32 em_address, const T data, Memory::XCheckT static void GenerateISIException(u32 effective_address); -u32 Read_Opcode(u32 _Address) +u32 Read_Opcode(u32 address) { - if (_Address == 0x00000000) + if (address == 0x00000000) { // FIXME use assert? PanicAlert("Program tried to read an opcode from [00000000]. It has crashed."); @@ -349,85 +294,65 @@ u32 Read_Opcode(u32 _Address) } if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && - (_Address & ADDR_MASK_MEM1)) + (address & ADDR_MASK_MEM1)) { // TODO: Check for MSR instruction address translation flag before translating - u32 tlb_addr = Memory::TranslateAddress(_Address, FLAG_OPCODE); + u32 tlb_addr = TranslateAddress(address); if (tlb_addr == 0) { - GenerateISIException(_Address); + GenerateISIException(address); return 0; } else { - _Address = tlb_addr; + address = tlb_addr; } } - return PowerPC::ppcState.iCache.ReadInstruction(_Address); + return PowerPC::ppcState.iCache.ReadInstruction(address); } -u8 Read_U8(const u32 _Address) +static __forceinline void Memcheck(u32 address, u32 var, bool write, int size) { - u8 _var = 0; - ReadFromHardware(_var, _Address, FLAG_READ); #ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); + TMemCheck *mc = PowerPC::memchecks.GetMemCheck(address); if (mc) { mc->numHits++; - mc->Action(&PowerPC::debug_interface, _var, _Address, false, 1, PC); + mc->Action(&PowerPC::debug_interface, var, address, write, size, PC); } #endif - return (u8)_var; } -u16 Read_U16(const u32 _Address) +u8 Read_U8(const u32 address) { - u16 _var = 0; - ReadFromHardware(_var, _Address, FLAG_READ); -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, _var, _Address, false, 2, PC); - } -#endif - return (u16)_var; + u8 var = ReadFromHardware(address); + Memcheck(address, var, false, 1); + return (u8)var; } -u32 Read_U32(const u32 _Address) +u16 Read_U16(const u32 address) { - u32 _var = 0; - ReadFromHardware(_var, _Address, FLAG_READ); -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, _var, _Address, false, 4, PC); - } -#endif - return _var; + u16 var = ReadFromHardware(address); + Memcheck(address, var, false, 2); + return (u16)var; } -u64 Read_U64(const u32 _Address) +u32 Read_U32(const u32 address) { - u64 _var = 0; - ReadFromHardware(_var, _Address, FLAG_READ); -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, (u32)_var, _Address, false, 8, PC); - } -#endif - return _var; + u32 var = ReadFromHardware(address); + Memcheck(address, var, false, 4); + return var; } -double Read_F64(const u32 _Address) +u64 Read_U64(const u32 address) +{ + u64 var = ReadFromHardware(address); + Memcheck(address, (u32)var, false, 8); + return var; +} + +double Read_F64(const u32 address) { union { @@ -435,11 +360,11 @@ double Read_F64(const u32 _Address) double d; } cvt; - cvt.i = Read_U64(_Address); + cvt.i = Read_U64(address); return cvt.d; } -float Read_F32(const u32 _Address) +float Read_F32(const u32 address) { union { @@ -447,158 +372,92 @@ float Read_F32(const u32 _Address) float d; } cvt; - cvt.i = Read_U32(_Address); + cvt.i = Read_U32(address); return cvt.d; } -u32 Read_U8_Val(u32 address, u32 val) +u32 Read_U8_ZX(const u32 address) { - ReadFromHardware(val, address, FLAG_READ); - return val; + return (u32)Read_U8(address); } -u32 Read_S8_Val(u32 address, u32 val) +u32 Read_U16_ZX(const u32 address) { - ReadFromHardware(val, address, FLAG_READ); - return val; + return (u32)Read_U16(address); } -u32 Read_U16_Val(u32 address, u32 val) +void Write_U8(const u8 var, const u32 address) { - ReadFromHardware(val, address, FLAG_READ); - return val; + Memcheck(address, var, true, 1); + WriteToHardware(address, var); } -u32 Read_S16_Val(u32 address, u32 val) +void Write_U16(const u16 var, const u32 address) { - ReadFromHardware(val, address, FLAG_READ); - return val; + Memcheck(address, var, true, 2); + WriteToHardware(address, var); } - -u32 Read_U32_Val(u32 address, u32 val) +void Write_U16_Swap(const u16 var, const u32 address) { - ReadFromHardware(val, address, FLAG_READ); - return val; -} - -u64 Read_U64_Val(u32 address, u64 val) -{ - ReadFromHardware(val, address, FLAG_READ); - return val; -} - -u32 Read_U8_ZX(const u32 _Address) -{ - return (u32)Read_U8(_Address); -} - -u32 Read_U16_ZX(const u32 _Address) -{ - return (u32)Read_U16(_Address); -} - -void Write_U8(const u8 _Data, const u32 _Address) -{ -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, _Data,_Address,true,1,PC); - } -#endif - WriteToHardware(_Address, _Data, FLAG_WRITE); + Memcheck(address, var, true, 2); + Write_U16(Common::swap16(var), address); } -void Write_U16(const u16 _Data, const u32 _Address) +void Write_U32(const u32 var, const u32 address) { -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, _Data,_Address,true,2,PC); - } -#endif - - WriteToHardware(_Address, _Data, FLAG_WRITE); + Memcheck(address, var, true, 4); + WriteToHardware(address, var); } -void Write_U16_Swap(const u16 _Data, const u32 _Address) +void Write_U32_Swap(const u32 var, const u32 address) { - Write_U16(Common::swap16(_Data), _Address); + Memcheck(address, var, true, 4); + Write_U32(Common::swap32(var), address); } - -void Write_U32(const u32 _Data, const u32 _Address) +void Write_U64(const u64 var, const u32 address) { -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, _Data,_Address,true,4,PC); - } -#endif - WriteToHardware(_Address, _Data, FLAG_WRITE); + Memcheck(address, (u32)var, true, 8); + WriteToHardware(address, var); } -void Write_U32_Swap(const u32 _Data, const u32 _Address) +void Write_U64_Swap(const u64 var, const u32 address) { - Write_U32(Common::swap32(_Data), _Address); + Memcheck(address, (u32)var, true, 8); + Write_U64(Common::swap64(var), address); } -void Write_U64(const u64 _Data, const u32 _Address) -{ -#ifdef ENABLE_MEM_CHECK - TMemCheck *mc = PowerPC::memchecks.GetMemCheck(_Address); - if (mc) - { - mc->numHits++; - mc->Action(&PowerPC::debug_interface, (u32)_Data,_Address,true,8,PC); - } -#endif - - WriteToHardware(_Address, _Data, FLAG_WRITE); -} -void Write_U64_Swap(const u64 _Data, const u32 _Address) -{ - Write_U64(Common::swap64(_Data), _Address); -} - -void Write_F64(const double _Data, const u32 _Address) +void Write_F64(const double var, const u32 address) { union { u64 i; double d; } cvt; - cvt.d = _Data; - Write_U64(cvt.i, _Address); + cvt.d = var; + Write_U64(cvt.i, address); } -u8 ReadUnchecked_U8(const u32 _Address) +u8 ReadUnchecked_U8(const u32 address) { - u8 _var = 0; - ReadFromHardware(_var, _Address, FLAG_NO_EXCEPTION); - return _var; + u8 var = ReadFromHardware(address); + return var; } -u32 ReadUnchecked_U32(const u32 _Address) +u32 ReadUnchecked_U32(const u32 address) { - u32 _var = 0; - ReadFromHardware(_var, _Address, FLAG_NO_EXCEPTION); - return _var; + u32 var = ReadFromHardware(address); + return var; } -void WriteUnchecked_U8(const u8 _iValue, const u32 _Address) +void WriteUnchecked_U8(const u8 var, const u32 address) { - WriteToHardware(_Address, _iValue, FLAG_NO_EXCEPTION); + WriteToHardware(address, var); } -void WriteUnchecked_U32(const u32 _iValue, const u32 _Address) +void WriteUnchecked_U32(const u32 var, const u32 address) { - WriteToHardware(_Address, _iValue, FLAG_NO_EXCEPTION); + WriteToHardware(address, var); } // ********************************************************************************* @@ -694,14 +553,21 @@ union UPTE2 u32 Hex; }; -static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite) +static void GenerateDSIException(u32 effectiveAddress, bool write) { - if (_bWrite) + // DSI exceptions are only supported in MMU mode. + if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU) + { + PanicAlertT("Invalid %s to 0x%08x, PC = 0x%08x ", write ? "Write to" : "Read from", effectiveAddress, PC); + return; + } + + if (effectiveAddress) PowerPC::ppcState.spr[SPR_DSISR] = PPC_EXC_DSISR_PAGE | PPC_EXC_DSISR_STORE; else PowerPC::ppcState.spr[SPR_DSISR] = PPC_EXC_DSISR_PAGE; - PowerPC::ppcState.spr[SPR_DAR] = _EffectiveAddress; + PowerPC::ppcState.spr[SPR_DAR] = effectiveAddress; Common::AtomicOr(PowerPC::ppcState.Exceptions, EXCEPTION_DSI); } @@ -741,111 +607,105 @@ void SDRUpdated() PowerPC::ppcState.pagetable_hashmask = ((xx<<10)|0x3ff); } - -static __forceinline u32 LookupTLBPageAddress(const XCheckTLBFlag _Flag, const u32 vpa, u32 *paddr) +enum TLBLookupResult { - PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[_Flag == FLAG_OPCODE][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; - if (tlbe[0].tag == (vpa & ~0xfff) && !(tlbe[0].flags & TLB_FLAG_INVALID)) + TLB_FOUND, + TLB_NOTFOUND, + TLB_UPDATE_C +}; + +static __forceinline TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 vpa, u32 *paddr) +{ + int tag = vpa >> HW_PAGE_INDEX_SHIFT; + PowerPC::tlb_entry *tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK]; + if (tlbe->tag[0] == tag) { // Check if C bit requires updating - if (_Flag == FLAG_WRITE) + if (flag == FLAG_WRITE) { UPTE2 PTE2; - PTE2.Hex = tlbe[0].pte; + PTE2.Hex = tlbe->pte[0]; if (PTE2.C == 0) { PTE2.C = 1; - tlbe[0].pte = PTE2.Hex; - return 0; + tlbe->pte[0] = PTE2.Hex; + return TLB_UPDATE_C; } } - if (_Flag != FLAG_NO_EXCEPTION) - { - tlbe[0].flags |= TLB_FLAG_MOST_RECENT; - tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT; - } + if (flag != FLAG_NO_EXCEPTION) + tlbe->recent = 0; - *paddr = tlbe[0].paddr | (vpa & 0xfff); + *paddr = tlbe->paddr[0] | (vpa & 0xfff); - return 1; + return TLB_FOUND; } - if (tlbe[1].tag == (vpa & ~0xfff) && !(tlbe[1].flags & TLB_FLAG_INVALID)) + if (tlbe->tag[1] == tag) { // Check if C bit requires updating - if (_Flag == FLAG_WRITE) + if (flag == FLAG_WRITE) { UPTE2 PTE2; - PTE2.Hex = tlbe[1].pte; + PTE2.Hex = tlbe->pte[1]; if (PTE2.C == 0) { PTE2.C = 1; - tlbe[1].pte = PTE2.Hex; - return 0; + tlbe->pte[1] = PTE2.Hex; + return TLB_UPDATE_C; } } - if (_Flag != FLAG_NO_EXCEPTION) - { - tlbe[1].flags |= TLB_FLAG_MOST_RECENT; - tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT; - } + if (flag != FLAG_NO_EXCEPTION) + tlbe->recent = 1; - *paddr = tlbe[1].paddr | (vpa & 0xfff); + *paddr = tlbe->paddr[1] | (vpa & 0xfff); - return 1; + return TLB_FOUND; } - return 0; + return TLB_NOTFOUND; } -static __forceinline void UpdateTLBEntry(const XCheckTLBFlag _Flag, UPTE2 PTE2, const u32 vpa) +static __forceinline void UpdateTLBEntry(const XCheckTLBFlag flag, UPTE2 PTE2, const u32 address) { - if (_Flag == FLAG_NO_EXCEPTION) + if (flag == FLAG_NO_EXCEPTION) return; - PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[_Flag == FLAG_OPCODE][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; - if ((tlbe[0].flags & TLB_FLAG_MOST_RECENT) == 0 || (tlbe[0].flags & TLB_FLAG_INVALID)) - { - tlbe[0].flags = TLB_FLAG_MOST_RECENT; - tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT; - tlbe[0].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT; - tlbe[0].pte = PTE2.Hex; - tlbe[0].tag = vpa & ~0xfff; - } - else - { - tlbe[1].flags = TLB_FLAG_MOST_RECENT; - tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT; - tlbe[1].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT; - tlbe[1].pte = PTE2.Hex; - tlbe[1].tag = vpa & ~0xfff; - } + int tag = address >> HW_PAGE_INDEX_SHIFT; + PowerPC::tlb_entry *tlbe = &PowerPC::ppcState.tlb[flag == FLAG_OPCODE][tag & HW_PAGE_INDEX_MASK]; + int index = tlbe->recent == 0 && tlbe->tag[0] != TLB_TAG_INVALID; + tlbe->recent = index; + tlbe->paddr[index] = PTE2.RPN << HW_PAGE_INDEX_SHIFT; + tlbe->pte[index] = PTE2.Hex; + tlbe->tag[index] = tag; } -void InvalidateTLBEntry(u32 vpa) +void InvalidateTLBEntry(u32 address) { - PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[0][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; - tlbe[0].flags |= TLB_FLAG_INVALID; - tlbe[1].flags |= TLB_FLAG_INVALID; - PowerPC::tlb_entry *tlbe_i = PowerPC::ppcState.tlb[1][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; - tlbe_i[0].flags |= TLB_FLAG_INVALID; - tlbe_i[1].flags |= TLB_FLAG_INVALID; + PowerPC::tlb_entry *tlbe = &PowerPC::ppcState.tlb[0][(address >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; + tlbe->tag[0] = TLB_TAG_INVALID; + tlbe->tag[1] = TLB_TAG_INVALID; + PowerPC::tlb_entry *tlbe_i = &PowerPC::ppcState.tlb[1][(address >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK]; + tlbe_i->tag[0] = TLB_TAG_INVALID; + tlbe_i->tag[1] = TLB_TAG_INVALID; } // Page Address Translation -static __forceinline u32 TranslatePageAddress(const u32 _Address, const XCheckTLBFlag _Flag) +static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLBFlag flag) { // TLB cache + // This catches 99%+ of lookups in practice, so the actual page table entry code below doesn't benefit + // much from optimization. u32 translatedAddress = 0; - if (LookupTLBPageAddress(_Flag, _Address, &translatedAddress)) + TLBLookupResult res = LookupTLBPageAddress(flag , address, &translatedAddress); + if (res == TLB_FOUND) return translatedAddress; - u32 sr = PowerPC::ppcState.sr[EA_SR(_Address)]; + u32 sr = PowerPC::ppcState.sr[EA_SR(address)]; - u32 offset = EA_Offset(_Address); // 12 bit - u32 page_index = EA_PageIndex(_Address); // 16 bit + u32 offset = EA_Offset(address); // 12 bit + u32 page_index = EA_PageIndex(address); // 16 bit u32 VSID = SR_VSID(sr); // 24 bit - u32 api = EA_API(_Address); // 6 bit (part of page_index) + u32 api = EA_API(address); // 6 bit (part of page_index) // Direct access to the fastmem Arena // FIXME: is this the best idea for clean code? @@ -853,53 +713,44 @@ static __forceinline u32 TranslatePageAddress(const u32 _Address, const XCheckTL // hash function no 1 "xor" .360 u32 hash = (VSID ^ page_index); + u32 pte1 = bswap((VSID << 7) | api | PTE1_V); for (int hash_func = 0; hash_func < 2; hash_func++) { + // hash function no 2 "not" .360 if (hash_func == 1) { - // hash function no 2 "not" .360 hash = ~hash; + pte1 |= PTE1_H << 24; } u32 pteg_addr = ((hash & PowerPC::ppcState.pagetable_hashmask) << 6) | PowerPC::ppcState.pagetable_base; - if ((pteg_addr >> 28) == 1) - base_mem = Memory::m_pEXRAM; - - for (int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++, pteg_addr += 8) { - u32 pte = bswap(*(u32*)&base_mem[pteg_addr]); - bool pteh = (pte & PTE1_H) == 0; - - if (hash_func == 1) - pteh = !pteh; - - if ((pte & PTE1_V) && pteh) + if (pte1 == *(u32*)&base_mem[pteg_addr]) { - if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) + UPTE2 PTE2; + PTE2.Hex = bswap((*(u32*)&base_mem[(pteg_addr + 4)])); + + // set the access bits + switch (flag) { - UPTE2 PTE2; - PTE2.Hex = bswap((*(u32*)&base_mem[(pteg_addr + 4)])); - - // set the access bits - switch (_Flag) - { - case FLAG_NO_EXCEPTION: break; - case FLAG_READ: PTE2.R = 1; break; - case FLAG_WRITE: PTE2.R = 1; PTE2.C = 1; break; - case FLAG_OPCODE: PTE2.R = 1; break; - } - - if (_Flag != FLAG_NO_EXCEPTION) - *(u32*)&base_mem[(pteg_addr + 4)] = bswap(PTE2.Hex); - - UpdateTLBEntry(_Flag, PTE2, _Address); - - return (PTE2.RPN << 12) | offset; + case FLAG_NO_EXCEPTION: break; + case FLAG_READ: PTE2.R = 1; break; + case FLAG_WRITE: PTE2.R = 1; PTE2.C = 1; break; + case FLAG_OPCODE: PTE2.R = 1; break; } + + if (flag != FLAG_NO_EXCEPTION) + *(u32*)&base_mem[(pteg_addr + 4)] = bswap(PTE2.Hex); + + // We already updated the TLB entry if this was caused by a C bit. + if (res != TLB_UPDATE_C) + UpdateTLBEntry(flag, PTE2, address); + + return (PTE2.RPN << 12) | offset; } - pteg_addr += 8; } } return 0; @@ -942,7 +793,7 @@ static inline bool CheckAddrBats(const u32 addr, u32* result, u32 batu, u32 spr) } // Block Address Translation -static u32 TranslateBlockAddress(const u32 addr, const XCheckTLBFlag _Flag) +static u32 TranslateBlockAddress(const u32 address, const XCheckTLBFlag flag) { u32 result = 0; UReg_MSR& m_MSR = ((UReg_MSR&)PowerPC::ppcState.msr); @@ -951,21 +802,22 @@ static u32 TranslateBlockAddress(const u32 addr, const XCheckTLBFlag _Flag) // Check for enhanced mode (secondary BAT enable) using 8 BATs bool enhanced_bats = SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && HID4.SBE; - if (_Flag != FLAG_OPCODE) + if (flag != FLAG_OPCODE) { - if (!CheckAddrBats(addr, &result, batu, SPR_DBAT0U) && enhanced_bats) - CheckAddrBats(addr, &result, batu, SPR_DBAT4U); + if (!CheckAddrBats(address, &result, batu, SPR_DBAT0U) && enhanced_bats) + CheckAddrBats(address, &result, batu, SPR_DBAT4U); } else { - if (!CheckAddrBats(addr, &result, batu, SPR_IBAT0U) && enhanced_bats) - CheckAddrBats(addr, &result, batu, SPR_IBAT4U); + if (!CheckAddrBats(address, &result, batu, SPR_IBAT0U) && enhanced_bats) + CheckAddrBats(address, &result, batu, SPR_IBAT4U); } return result; } // Translate effective address using BAT or PAT. Returns 0 if the address cannot be translated. -u32 TranslateAddress(const u32 _Address, const XCheckTLBFlag _Flag) +template +u32 TranslateAddress(const u32 address) { // Check MSR[IR] bit before translating instruction addresses. Rogue Leader clears IR and DR?? //if ((_Flag == FLAG_OPCODE) && !(MSR & (1 << (31 - 26)))) return _Address; @@ -977,10 +829,15 @@ u32 TranslateAddress(const u32 _Address, const XCheckTLBFlag _Flag) // so only do it where it's really needed. if (SConfig::GetInstance().m_LocalCoreStartupParameter.bBAT) { - u32 tlb_addr = TranslateBlockAddress(_Address, _Flag); + u32 tlb_addr = TranslateBlockAddress(address, flag); if (tlb_addr) return tlb_addr; } - return TranslatePageAddress(_Address, _Flag); + return TranslatePageAddress(address, flag); } + +template u32 TranslateAddress(const u32 address); +template u32 TranslateAddress(const u32 address); +template u32 TranslateAddress(const u32 address); +template u32 TranslateAddress(const u32 address); } // namespace diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 99fea369f8..539bb1b84b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -178,11 +178,12 @@ void Jit64::Init() jo.optimizeGatherPipe = true; jo.accurateSinglePrecision = true; js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU; + js.fastmemLoadStore = NULL; gpr.SetEmitter(this); fpr.SetEmitter(this); - trampolines.Init(); + trampolines.Init(js.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE); AllocCodeSpace(CODE_SIZE); // BLR optimization has the same consequences as block linking, as well as @@ -493,9 +494,10 @@ void Jit64::Jit(u32 em_address) { if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || - blocks.IsFull() || - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache || - m_clear_cache_asap) + trampolines.GetSpaceLeft() < 0x10000 || + blocks.IsFull() || + SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache || + m_clear_cache_asap) { ClearCache(); } @@ -612,6 +614,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.instructionsLeft = (code_block.m_num_instructions - 1) - i; const GekkoOPInfo *opinfo = ops[i].opinfo; js.downcountAmount += opinfo->numCycles; + js.fastmemLoadStore = NULL; + js.fixupExceptionHandler = false; + js.revertGprLoad = -1; + js.revertFprLoad = -1; if (i == (code_block.m_num_instructions - 1)) { @@ -761,22 +767,37 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc Jit64Tables::CompileInstruction(ops[i]); - // If we have a register that will never be used again, flush it. - for (int j : ~ops[i].gprInUse) - gpr.StoreFromRegister(j); - for (int j : ~ops[i].fprInUse) - fpr.StoreFromRegister(j); - if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) { - TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); - FixupBranch memException = J_CC(CC_NZ, true); + // If we have a fastmem loadstore, we can omit the exception check and let fastmem handle it. + FixupBranch memException; + _assert_msg_(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler), + "Fastmem loadstores shouldn't have exception handler fixups (PC=%x)!", ops[i].address); + if (!js.fastmemLoadStore && !js.fixupExceptionHandler) + { + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); + memException = J_CC(CC_NZ, true); + } SwitchToFarCode(); - SetJumpTarget(memException); + if (!js.fastmemLoadStore) + { + exceptionHandlerAtLoc[js.fastmemLoadStore] = NULL; + SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException); + } + else + { + exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr(); + } - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); + BitSet32 gprToFlush = BitSet32::AllTrue(32); + BitSet32 fprToFlush = BitSet32::AllTrue(32); + if (js.revertGprLoad >= 0) + gprToFlush[js.revertGprLoad] = false; + if (js.revertFprLoad >= 0) + fprToFlush[js.revertFprLoad] = false; + gpr.Flush(FLUSH_MAINTAIN_STATE, gprToFlush); + fpr.Flush(FLUSH_MAINTAIN_STATE, fprToFlush); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. @@ -785,6 +806,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SwitchToNearCode(); } + // If we have a register that will never be used again, flush it. + for (int j : ~ops[i].gprInUse) + gpr.StoreFromRegister(j); + for (int j : ~ops[i].fprInUse) + fpr.StoreFromRegister(j); + if (opinfo->flags & FL_LOADSTORE) ++jit->js.numLoadStoreInst; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 48b8e5ada3..7da231d70e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -133,6 +133,7 @@ public: // Clobbers RDX. void SetCRFieldBit(int field, int bit, Gen::X64Reg in); void ClearCRFieldBit(int field, int bit); + void SetCRFieldBit(int field, int bit); // Generates a branch that will check if a given bit of a CR register part // is set or not. diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 28c2b3fa97..68555e3824 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -226,6 +226,8 @@ void Jit64AsmRoutineManager::GenerateCommon() GenFrsqrte(); fres = AlignCode4(); GenFres(); + mfcr = AlignCode4(); + GenMfcr(); GenQuantizedLoads(); GenQuantizedStores(); diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index f91694ba9e..334c46379e 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -401,7 +401,7 @@ void FPURegCache::StoreRegister(size_t preg, OpArg newLoc) emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg()); } -void RegCache::Flush(FlushMode mode) +void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) { for (unsigned int i = 0; i < xregs.size(); i++) { @@ -409,7 +409,7 @@ void RegCache::Flush(FlushMode mode) PanicAlert("Someone forgot to unlock X64 reg %u", i); } - for (unsigned int i = 0; i < regs.size(); i++) + for (unsigned int i : regsToFlush) { if (regs[i].locked) { diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index 3943e83852..0e2f2ea687 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -81,7 +81,7 @@ public: LockX(reg1); LockX(reg2); } - void Flush(FlushMode mode = FLUSH_ALL); + void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32)); void Flush(PPCAnalyst::CodeOp *op) {Flush();} int SanityCheck() const; void KillImmediate(size_t preg, bool doLoad, bool makeDirty); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index bf6f76beda..c322c2248f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -246,29 +246,41 @@ void Jit64::lXXx(UGeckoInstruction inst) } gpr.Lock(a, b, d); - gpr.BindToRegister(d, js.memcheck, true); - BitSet32 registersInUse = CallerSavedRegistersInUse(); + if (update && storeAddress) + gpr.BindToRegister(a, true, true); + + // A bit of an evil hack here. We need to retain the original value of this register for the + // exception path, but we'd rather not needlessly pass it around if we don't have to, since + // the exception path is very rare. So we store the value in the regcache, let the load path + // clobber it, then restore the value in the exception path. + // TODO: no other load has to do this at the moment, since no other loads go directly to the + // target registers, but if that ever changes, we need to do it there too. + if (js.memcheck) { - // We need to save the (usually scratch) address register for the update. - registersInUse[RSCRATCH2] = true; + gpr.StoreFromRegister(d); + js.revertGprLoad = d; } + gpr.BindToRegister(d, false, true); + + BitSet32 registersInUse = CallerSavedRegistersInUse(); + // We need to save the (usually scratch) address register for the update. + if (update && storeAddress) + registersInUse[RSCRATCH2] = true; + SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); if (update && storeAddress) { - gpr.BindToRegister(a, true, true); - MEMCHECK_START(false) + MemoryExceptionCheck(); MOV(32, gpr.R(a), opAddress); - MEMCHECK_END } // TODO: support no-swap in SafeLoadToReg instead if (byte_reversed) { - MEMCHECK_START(false) + MemoryExceptionCheck(); BSWAP(accessSize, gpr.RX(d)); - MEMCHECK_END } gpr.UnlockAll(); @@ -372,9 +384,8 @@ void Jit64::stX(UGeckoInstruction inst) else { gpr.KillImmediate(a, true, true); - MEMCHECK_START(false) + MemoryExceptionCheck(); ADD(32, gpr.R(a), Imm32((u32)offset)); - MEMCHECK_END } } } @@ -404,9 +415,8 @@ void Jit64::stX(UGeckoInstruction inst) if (update) { - MEMCHECK_START(false) + MemoryExceptionCheck(); ADD(32, gpr.R(a), Imm32((u32)offset)); - MEMCHECK_END } } gpr.UnlockAll(); @@ -425,12 +435,9 @@ void Jit64::stXx(UGeckoInstruction inst) gpr.Lock(a, b, s); if (update) - { gpr.BindToRegister(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(RSCRATCH2), gpr.R(a)); - } - else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) + + if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); } @@ -462,7 +469,10 @@ void Jit64::stXx(UGeckoInstruction inst) if (gpr.R(s).IsImm()) { - SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); + BitSet32 registersInUse = CallerSavedRegistersInUse(); + if (update) + registersInUse[RSCRATCH2] = true; + SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); } else { @@ -477,15 +487,16 @@ void Jit64::stXx(UGeckoInstruction inst) gpr.BindToRegister(s, true, false); reg_value = gpr.RX(s); } - SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); + BitSet32 registersInUse = CallerSavedRegistersInUse(); + if (update) + registersInUse[RSCRATCH2] = true; + SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, registersInUse, byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); } - if (update && js.memcheck) + if (update) { - // revert the address change if an exception occurred - MEMCHECK_START(true) - SUB(32, gpr.R(a), gpr.R(b)); - MEMCHECK_END; + MemoryExceptionCheck(); + MOV(32, gpr.R(a), R(RSCRATCH2)); } gpr.UnlockAll(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 2a246b3a0b..bc61136a6c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -46,9 +46,9 @@ void Jit64::lfXXX(UGeckoInstruction inst) } else { - addr = R(RSCRATCH); + addr = R(RSCRATCH2); if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { MOV(32, addr, gpr.R(b)); @@ -65,14 +65,19 @@ void Jit64::lfXXX(UGeckoInstruction inst) offset = (s16)inst.SIMM_16; } + fpr.Lock(d); + if (js.memcheck && single) + { + fpr.StoreFromRegister(d); + js.revertFprLoad = d; + } + fpr.BindToRegister(d, !single); BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && js.memcheck) registersInUse[RSCRATCH2] = true; SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false); - fpr.Lock(d); - fpr.BindToRegister(d, js.memcheck || !single); - MEMCHECK_START(false) + MemoryExceptionCheck(); if (single) { ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true); @@ -84,7 +89,6 @@ void Jit64::lfXXX(UGeckoInstruction inst) } if (update && js.memcheck) MOV(32, gpr.R(a), addr); - MEMCHECK_END fpr.UnlockAll(); gpr.UnlockAll(); } @@ -141,9 +145,8 @@ void Jit64::stfXXX(UGeckoInstruction inst) else { gpr.KillImmediate(a, true, true); - MEMCHECK_START(false) + MemoryExceptionCheck(); ADD(32, gpr.R(a), Imm32((u32)imm)); - MEMCHECK_END } } fpr.UnlockAll(); @@ -152,48 +155,43 @@ void Jit64::stfXXX(UGeckoInstruction inst) } s32 offset = 0; + if (update) + gpr.BindToRegister(a, true, true); if (indexed) { - if (update) - { - gpr.BindToRegister(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(RSCRATCH2), gpr.R(a)); - } + if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { - if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); - else - { - MOV(32, R(RSCRATCH2), gpr.R(b)); - if (a) - ADD(32, R(RSCRATCH2), gpr.R(a)); - } + MOV(32, R(RSCRATCH2), gpr.R(b)); + if (a) + ADD(32, R(RSCRATCH2), gpr.R(a)); } } else { if (update) { - gpr.BindToRegister(a, true, true); - ADD(32, gpr.R(a), Imm32(imm)); + LEA(32, RSCRATCH2, MDisp(gpr.RX(a), imm)); } else { offset = imm; + MOV(32, R(RSCRATCH2), gpr.R(a)); } - MOV(32, R(RSCRATCH2), gpr.R(a)); } - SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, CallerSavedRegistersInUse()); + BitSet32 registersInUse = CallerSavedRegistersInUse(); + // We need to save the (usually scratch) address register for the update. + if (update) + registersInUse[RSCRATCH2] = true; - if (js.memcheck && update) + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse); + + if (update) { - // revert the address change if an exception occurred - MEMCHECK_START(true) - SUB(32, gpr.R(a), indexed ? gpr.R(b) : Imm32(imm)); - MEMCHECK_END + MemoryExceptionCheck(); + MOV(32, gpr.R(a), R(RSCRATCH2)); } fpr.UnlockAll(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 26a4f022e7..b6dac78f86 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -78,12 +78,11 @@ void Jit64::psq_stXX(UGeckoInstruction inst) if (update && js.memcheck) { - MEMCHECK_START(false) + MemoryExceptionCheck(); if (indexed) ADD(32, gpr.R(a), gpr.R(b)); else ADD(32, gpr.R(a), Imm32((u32)offset)); - MEMCHECK_END } gpr.UnlockAll(); gpr.UnlockAllX(); @@ -137,7 +136,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst) CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8]))); - MEMCHECK_START(false) + MemoryExceptionCheck(); CVTPS2PD(fpr.RX(s), R(XMM0)); if (update && js.memcheck) { @@ -146,7 +145,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst) else ADD(32, gpr.R(a), Imm32((u32)offset)); } - MEMCHECK_END gpr.UnlockAll(); gpr.UnlockAllX(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index af5a187f91..1b3772ff55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -112,6 +112,41 @@ void Jit64::ClearCRFieldBit(int field, int bit) // We don't need to set bit 32; the cases where that's needed only come up when setting bits, not clearing. } +void Jit64::SetCRFieldBit(int field, int bit) +{ + MOV(64, R(RSCRATCH), PPCSTATE(cr_val[field])); + if (bit != CR_GT_BIT) + { + TEST(64, R(RSCRATCH), R(RSCRATCH)); + FixupBranch dont_clear_gt = J_CC(CC_NZ); + BTS(64, R(RSCRATCH), Imm8(63)); + SetJumpTarget(dont_clear_gt); + } + + switch (bit) + { + case CR_SO_BIT: + BTS(64, PPCSTATE(cr_val[field]), Imm8(61)); + break; + + case CR_EQ_BIT: + SHR(64, R(RSCRATCH), Imm8(32)); + SHL(64, R(RSCRATCH), Imm8(32)); + break; + + case CR_GT_BIT: + BTR(64, PPCSTATE(cr_val[field]), Imm8(63)); + break; + + case CR_LT_BIT: + BTS(64, PPCSTATE(cr_val[field]), Imm8(62)); + break; + } + + BTS(64, R(RSCRATCH), Imm8(32)); + MOV(64, PPCSTATE(cr_val[field]), R(RSCRATCH)); +} + FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { switch (bit) @@ -371,39 +406,12 @@ void Jit64::mfcr(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - // USES_CR int d = inst.RD; + gpr.FlushLockX(RSCRATCH_EXTRA); + CALL((void *)asm_routines.mfcr); + gpr.Lock(d); gpr.BindToRegister(d, false, true); - XOR(32, gpr.R(d), gpr.R(d)); - - X64Reg cr_val = RSCRATCH2; - // we only need to zero the high bits of RSCRATCH once - XOR(32, R(RSCRATCH), R(RSCRATCH)); - for (int i = 0; i < 8; i++) - { - static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9}; - if (i != 0) - SHL(32, gpr.R(d), Imm8(4)); - - MOV(64, R(cr_val), PPCSTATE(cr_val[i])); - - // EQ: Bits 31-0 == 0; set flag bit 1 - TEST(32, R(cr_val), R(cr_val)); - SETcc(CC_Z, R(RSCRATCH)); - LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0)); - - // GT: Value > 0; set flag bit 2 - TEST(64, R(cr_val), R(cr_val)); - SETcc(CC_G, R(RSCRATCH)); - LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0)); - - // SO: Bit 61 set; set flag bit 0 - // LT: Bit 62 set; set flag bit 3 - SHR(64, R(cr_val), Imm8(61)); - MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable)); - OR(32, gpr.R(d), R(RSCRATCH)); - } - + MOV(32, gpr.R(d), R(RSCRATCH)); gpr.UnlockAll(); gpr.UnlockAllX(); } @@ -506,6 +514,13 @@ void Jit64::crXXX(UGeckoInstruction inst) return; } + // Special case: crset + if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) + { + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + // TODO(delroth): Potential optimizations could be applied here. For // instance, if the two CR bits being loaded are the same, two loads are // not required. diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 45f910eca0..b30515c101 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -249,7 +249,7 @@ void JitIL::Init() jo.accurateSinglePrecision = false; js.memcheck = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU; - trampolines.Init(); + trampolines.Init(js.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE); AllocCodeSpace(CODE_SIZE); blocks.Init(); asm_routines.Init(nullptr); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index be217f6f7b..0f95402983 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -151,6 +151,44 @@ void CommonAsmRoutines::GenFres() RET(); } +void CommonAsmRoutines::GenMfcr() +{ + // Input: none + // Output: RSCRATCH + // This function clobbers all three RSCRATCH. + X64Reg dst = RSCRATCH; + X64Reg tmp = RSCRATCH2; + X64Reg cr_val = RSCRATCH_EXTRA; + XOR(32, R(dst), R(dst)); + // we only need to zero the high bits of tmp once + XOR(32, R(tmp), R(tmp)); + for (int i = 0; i < 8; i++) + { + static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 }; + if (i != 0) + SHL(32, R(dst), Imm8(4)); + + MOV(64, R(cr_val), PPCSTATE(cr_val[i])); + + // EQ: Bits 31-0 == 0; set flag bit 1 + TEST(32, R(cr_val), R(cr_val)); + // FIXME: is there a better way to do this without the partial register merging? + SETcc(CC_Z, R(tmp)); + LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0)); + + // GT: Value > 0; set flag bit 2 + TEST(64, R(cr_val), R(cr_val)); + SETcc(CC_G, R(tmp)); + LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0)); + + // SO: Bit 61 set; set flag bit 0 + // LT: Bit 62 set; set flag bit 3 + SHR(64, R(cr_val), Imm8(61)); + OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable)); + } + RET(); +} + // Safe + Fast Quantizers, originally from JITIL by magumagu static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 847d318230..34a7232a45 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -25,6 +25,7 @@ public: const u8 *frsqrte; const u8 *fres; + const u8 *mfcr; // In: array index: GQR to use. // In: ECX: Address to read from. @@ -58,4 +59,5 @@ public: void GenFifoWrite(int size); void GenFrsqrte(); void GenFres(); + void GenMfcr(); }; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 3a693a3c71..566ce38109 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -73,9 +73,16 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) BitSet32 registersInUse = it->second; + u8* exceptionHandler = NULL; + if (jit->js.memcheck) + { + auto it2 = exceptionHandlerAtLoc.find(codePtr); + if (it2 != exceptionHandlerAtLoc.end()) + exceptionHandler = it2->second; + } + if (!info.isMemoryWrite) { - XEmitter emitter(codePtr); int bswapNopCount; if (info.byteSwap || info.operandSize == 1) bswapNopCount = 0; @@ -101,9 +108,11 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) totalSize += 3; } - const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse); - emitter.CALL((void *)trampoline); + XEmitter emitter(codePtr); int padding = totalSize - BACKPATCH_SIZE; + u8* returnPtr = codePtr + 5 + padding; + const u8* trampoline = trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr); + emitter.JMP(trampoline, true); if (padding > 0) { emitter.NOP(padding); @@ -113,14 +122,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) else { // TODO: special case FIFO writes. Also, support 32-bit mode. - auto it2 = pcAtLoc.find(codePtr); - if (it2 == pcAtLoc.end()) + auto it3 = pcAtLoc.find(codePtr); + if (it3 == pcAtLoc.end()) { PanicAlert("BackPatch: no pc entry for address %p", codePtr); return nullptr; } - u32 pc = it2->second; + u32 pc = it3->second; u8 *start; if (info.byteSwap || info.hasImmediate) @@ -154,9 +163,10 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) start = codePtr - bswapSize; } XEmitter emitter(start); - const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, pc); - emitter.CALL((void *)trampoline); - ptrdiff_t padding = (codePtr - emitter.GetCodePtr()) + info.instructionSize; + ptrdiff_t padding = (codePtr - (start + 5)) + info.instructionSize; + u8* returnPtr = start + 5 + padding; + const u8* trampoline = trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc); + emitter.JMP(trampoline, true); if (padding > 0) { emitter.NOP(padding); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 8af315a7fa..cb79f3f511 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -73,6 +73,16 @@ protected: int downcountAmount; u32 numLoadStoreInst; u32 numFloatingPointInst; + // If this is set, we need to generate an exception handler for the fastmem load. + u8* fastmemLoadStore; + // If this is set, a load or store already prepared a jump to the exception handler for us, + // so just fixup that branch instead of testing for a DSI again. + bool fixupExceptionHandler; + Gen::FixupBranch exceptionHandler; + // If these are set, we've stored the old value of a register which will be loaded in revertLoad, + // which lets us revert it on the exception path. + int revertGprLoad; + int revertFprLoad; bool firstFPInstructionFound; bool isLastInstruction; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index ca217ee63d..1209e2bd46 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -13,6 +13,16 @@ using namespace Gen; +void EmuCodeBlock::MemoryExceptionCheck() +{ + if (jit->js.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler) + { + TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); + jit->js.exceptionHandler = J_CC(Gen::CC_NZ, true); + jit->js.fixupExceptionHandler = true; + } +} + void EmuCodeBlock::LoadAndSwap(int size, Gen::X64Reg dst, const Gen::OpArg& src) { if (cpu_info.bMOVBE) @@ -292,10 +302,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, B void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) { - if (!jit->js.memcheck) - { - registersInUse[reg_value] = false; - } + registersInUse[reg_value] = false; if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && !opAddress.IsImm() && !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) @@ -307,6 +314,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); registersInUseAtLoc[mov] = registersInUse; + jit->js.fastmemLoadStore = mov; } else { @@ -349,7 +357,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } ABI_PopRegistersAndAdjustStack(registersInUse, 0); - MEMCHECK_START(false) + MemoryExceptionCheck(); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. @@ -359,7 +367,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, { MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); } - MEMCHECK_END } } else @@ -399,7 +406,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); - MEMCHECK_START(false) + MemoryExceptionCheck(); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. @@ -409,7 +416,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, { MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); } - MEMCHECK_END if (farcode.Enabled()) { @@ -547,8 +553,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces reg_value = FixImmediate(accessSize, reg_value); // TODO: support byte-swapped non-immediate fastmem stores - if (!jit->js.memcheck && - SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && (reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)) #ifdef ENABLE_MEM_CHECK @@ -566,6 +571,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces registersInUseAtLoc[mov] = registersInUse; pcAtLoc[mov] = jit->js.compilerPC; + jit->js.fastmemLoadStore = mov; return; } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 378465120c..9c11937b47 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -12,18 +12,6 @@ namespace MMIO { class Mapping; } -// If inv is true, invert the check (i.e. skip over the associated code if an exception hits, -// instead of skipping over the code if an exception isn't hit). -#define MEMCHECK_START(inv) \ - Gen::FixupBranch memException; \ - if (jit->js.memcheck) \ - { TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ - memException = J_CC((inv) ? Gen::CC_Z : Gen::CC_NZ, true); } - -#define MEMCHECK_END \ - if (jit->js.memcheck) \ - SetJumpTarget(memException); - // We offset by 0x80 because the range of one byte memory offsets is // -0x80..0x7f. #define PPCSTATE(x) MDisp(RPPCSTATE, \ @@ -54,6 +42,10 @@ static const int CODE_SIZE = 1024 * 1024 * 32; static const int FARCODE_SIZE = 1024 * 1024 * 8; static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48; +// same for the trampoline code cache, because fastmem results in far more backpatches in MMU mode +static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8; +static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32; + // Like XCodeBlock but has some utilities for memory access. class EmuCodeBlock : public Gen::X64CodeBlock { @@ -61,6 +53,8 @@ public: FarCodeCache farcode; u8* nearcode; // Backed up when we switch to far code. + void MemoryExceptionCheck(); + // Simple functions to switch between near and far code emitting void SwitchToFarCode() { @@ -141,4 +135,5 @@ public: protected: std::unordered_map registersInUseAtLoc; std::unordered_map pcAtLoc; + std::unordered_map exceptionHandlerAtLoc; }; diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp index 2561b436af..63a436511d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -19,37 +19,22 @@ using namespace Gen; -void TrampolineCache::Init() +void TrampolineCache::Init(int size) { - AllocCodeSpace(8 * 1024 * 1024); + AllocCodeSpace(size); } void TrampolineCache::ClearCodeSpace() { X64CodeBlock::ClearCodeSpace(); - cachedTrampolines.clear(); } void TrampolineCache::Shutdown() { FreeCodeSpace(); - cachedTrampolines.clear(); } -const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse) -{ - TrampolineCacheKey key = { registersInUse, 0, info }; - - auto it = cachedTrampolines.find(key); - if (it != cachedTrampolines.end()) - return it->second; - - const u8* trampoline = GenerateReadTrampoline(info, registersInUse); - cachedTrampolines[key] = trampoline; - return trampoline; -} - -const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse) +const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -57,57 +42,63 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B const u8* trampoline = GetCodePtr(); X64Reg addrReg = (X64Reg)info.scaledReg; X64Reg dataReg = (X64Reg)info.regOperandReg; - registersInUse[addrReg] = true; - registersInUse[dataReg] = false; + int stack_offset = 0; + bool push_param1 = registersInUse[ABI_PARAM1]; - // It's a read. Easy. - // RSP alignment here is 8 due to the call. - ABI_PushRegistersAndAdjustStack(registersInUse, 8); + if (push_param1) + { + PUSH(ABI_PARAM1); + stack_offset = 8; + registersInUse[ABI_PARAM1] = 0; + } int dataRegSize = info.operandSize == 8 ? 64 : 32; - MOVTwo(dataRegSize, ABI_PARAM1, addrReg, ABI_PARAM2, dataReg); - - if (info.displacement) + if (addrReg != ABI_PARAM1 && info.displacement) + LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement)); + else if (addrReg != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(addrReg)); + else if (info.displacement) ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); + ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset); + switch (info.operandSize) { case 8: - CALL((void *)&Memory::Read_U64_Val); + CALL((void *)&Memory::Read_U64); break; case 4: - CALL((void *)&Memory::Read_U32_Val); + CALL((void *)&Memory::Read_U32); break; case 2: - CALL(info.signExtend ? (void *)&Memory::Read_S16_Val : (void *)&Memory::Read_U16_Val); + CALL((void *)&Memory::Read_U16); break; case 1: - CALL(info.signExtend ? (void *)&Memory::Read_S8_Val : (void *)&Memory::Read_U8_Val); + CALL((void *)&Memory::Read_U8); break; } - if (dataReg != ABI_RETURN) - MOV(dataRegSize, R(dataReg), R(ABI_RETURN)); + ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset); - ABI_PopRegistersAndAdjustStack(registersInUse, 8); - RET(); + if (push_param1) + POP(ABI_PARAM1); + + if (exceptionHandler) + { + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); + J_CC(CC_NZ, exceptionHandler); + } + + if (info.signExtend) + MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN)); + else if (dataReg != ABI_RETURN || info.operandSize < 4) + MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN)); + + JMP(returnPtr, true); return trampoline; } -const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc) -{ - TrampolineCacheKey key = { registersInUse, pc, info }; - - auto it = cachedTrampolines.find(key); - if (it != cachedTrampolines.end()) - return it->second; - - const u8* trampoline = GenerateWriteTrampoline(info, registersInUse, pc); - cachedTrampolines[key] = trampoline; - return trampoline; -} - -const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc) +const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -117,20 +108,23 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, X64Reg dataReg = (X64Reg)info.regOperandReg; X64Reg addrReg = (X64Reg)info.scaledReg; - // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a - // hardware access - we can take shortcuts. // Don't treat FIFO writes specially for now because they require a burst // check anyway. // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(pc)); - ABI_PushRegistersAndAdjustStack(registersInUse, 8); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); if (info.hasImmediate) { - if (addrReg != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(addrReg)); + if (addrReg != ABI_PARAM2 && info.displacement) + LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement)); + else if (addrReg != ABI_PARAM2) + MOV(32, R(ABI_PARAM2), R(addrReg)); + else if (info.displacement) + ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); + // we have to swap back the immediate to pass it to the write functions switch (info.operandSize) { @@ -150,11 +144,8 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, } else { - MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg); - } - if (info.displacement) - { - ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); + int dataRegSize = info.operandSize == 8 ? 64 : 32; + MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg); } switch (info.operandSize) @@ -173,31 +164,13 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, break; } - ABI_PopRegistersAndAdjustStack(registersInUse, 8); - RET(); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); + if (exceptionHandler) + { + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); + J_CC(CC_NZ, exceptionHandler); + } + JMP(returnPtr, true); return trampoline; } - -size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const -{ - size_t res = std::hash()(k.registersInUse.m_val); - res ^= std::hash()(k.info.operandSize) >> 1; - res ^= std::hash()(k.info.regOperandReg) >> 2; - res ^= std::hash()(k.info.scaledReg) >> 3; - res ^= std::hash()(k.info.immediate) >> 4; - res ^= std::hash()(k.pc) >> 5; - res ^= std::hash()(k.info.displacement) << 1; - res ^= std::hash()(k.info.signExtend) << 2; - res ^= std::hash()(k.info.hasImmediate) << 3; - res ^= std::hash()(k.info.isMemoryWrite) << 4; - - return res; -} - -bool TrampolineCacheKey::operator==(const TrampolineCacheKey &other) const -{ - return pc == other.pc && - registersInUse == other.registersInUse && - info == other.info; -} diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h index 16e293bce0..305ab2389a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h @@ -14,33 +14,13 @@ // We need at least this many bytes for backpatching. const int BACKPATCH_SIZE = 5; -struct TrampolineCacheKey -{ - BitSet32 registersInUse; - u32 pc; - InstructionInfo info; - - bool operator==(const TrampolineCacheKey &other) const; -}; - -struct TrampolineCacheKeyHasher -{ - size_t operator()(const TrampolineCacheKey& k) const; -}; - class TrampolineCache : public Gen::X64CodeBlock { public: - void Init(); + void Init(int size); void Shutdown(); - const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse); - const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc); + const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr); + const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc); void ClearCodeSpace(); - -private: - const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse); - const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u32 pc); - - std::unordered_map cachedTrampolines; }; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 86c17653d5..87cb0c6d43 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -211,7 +211,7 @@ namespace JitInterface { if (bMMU && !bFakeVMEM && (_Address & Memory::ADDR_MASK_MEM1)) { - _Address = Memory::TranslateAddress(_Address, Memory::FLAG_OPCODE); + _Address = Memory::TranslateAddress(_Address); if (_Address == 0) { return 0; diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 8cb1e5b383..5a1f859bf2 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -649,7 +649,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 bool virtualAddr = SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && (address & JIT_ICACHE_VMEM_BIT); if (virtualAddr) { - if (!Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION)) + if (!Memory::TranslateAddress(address)) { // Memory exception occurred during instruction fetch block->m_memory_exception = true; @@ -670,6 +670,15 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 if (inst.hex != 0) { + // Slight hack: the JIT block cache currently assumes all blocks end at the same place, + // but broken blocks due to page faults break this assumption. Avoid this by just ending + // all virtual memory instruction blocks at page boundaries. + // FIXME: improve the JIT block cache so we don't need to do this. + if (virtualAddr && i > 0 && (address & 0xfff) == 0) + { + break; + } + num_inst++; memset(&code[i], 0, sizeof(CodeOp)); GekkoOPInfo *opinfo = GetOpInfo(inst); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 35f96fb495..5f8bc9de00 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -125,12 +125,12 @@ void Init(int cpu_core) { for (int set = 0; set < 64; set++) { + ppcState.tlb[tlb][set].recent = 0; for (int way = 0; way < 2; way++) { - ppcState.tlb[tlb][set][way].flags = TLB_FLAG_INVALID; - ppcState.tlb[tlb][set][way].paddr = 0; - ppcState.tlb[tlb][set][way].pte = 0; - ppcState.tlb[tlb][set][way].tag = 0; + ppcState.tlb[tlb][set].paddr[way] = 0; + ppcState.tlb[tlb][set].pte[way] = 0; + ppcState.tlb[tlb][set].tag[way] = TLB_TAG_INVALID; } } } diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 69eb0da28b..bb6d418065 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -29,22 +29,21 @@ enum CoreMode // TLB cache #define TLB_SIZE 128 -#define TLB_WAYS 2 #define NUM_TLBS 2 +#define TLB_WAYS 2 #define HW_PAGE_INDEX_SHIFT 12 #define HW_PAGE_INDEX_MASK 0x3f #define HW_PAGE_TAG_SHIFT 18 -#define TLB_FLAG_MOST_RECENT 0x01 -#define TLB_FLAG_INVALID 0x02 +#define TLB_TAG_INVALID 0xffffffff struct tlb_entry { - u32 tag; - u32 paddr; - u32 pte; - u8 flags; + u32 tag[TLB_WAYS]; + u32 paddr[TLB_WAYS]; + u32 pte[TLB_WAYS]; + u8 recent; }; // This contains the entire state of the emulated PowerPC "Gekko" CPU. @@ -107,7 +106,7 @@ struct GC_ALIGNED64(PowerPCState) // also for power management, but we don't care about that. u32 spr[1024]; - tlb_entry tlb[NUM_TLBS][TLB_SIZE / TLB_WAYS][TLB_WAYS]; + tlb_entry tlb[NUM_TLBS][TLB_SIZE / TLB_WAYS]; u32 pagetable_base; u32 pagetable_hashmask; diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index d156bb4adc..a63fafc2a5 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -64,7 +64,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 37; +static const u32 STATE_VERSION = 38; enum {