From e97d3804373b31724d0f092d8132e5ba23dec4a5 Mon Sep 17 00:00:00 2001 From: TheLordScruffy Date: Mon, 17 Oct 2022 15:28:29 -0400 Subject: [PATCH 1/5] Implement PPC write-back data cache --- Source/Core/Core/Boot/Boot_BS2Emu.cpp | 6 + Source/Core/Core/Config/MainSettings.cpp | 1 + Source/Core/Core/Config/MainSettings.h | 1 + .../Core/ConfigLoaders/IsSettingSaveable.cpp | 1 + .../Interpreter/Interpreter_LoadStore.cpp | 78 +++-- .../Interpreter_SystemRegisters.cpp | 23 ++ .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 2 + .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 5 + .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 3 + .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 2 + .../JitArm64/JitArm64_SystemRegisters.cpp | 9 + .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 7 + Source/Core/Core/PowerPC/JitCommon/JitBase.h | 1 + Source/Core/Core/PowerPC/MMU.cpp | 141 +++++++- Source/Core/Core/PowerPC/MMU.h | 5 + Source/Core/Core/PowerPC/PPCCache.cpp | 317 +++++++++++++----- Source/Core/Core/PowerPC/PPCCache.h | 46 ++- Source/Core/Core/PowerPC/PowerPC.cpp | 18 + Source/Core/Core/PowerPC/PowerPC.h | 2 + Source/Core/Core/State.cpp | 10 +- .../Core/DolphinQt/Settings/AdvancedPane.cpp | 12 + Source/Core/DolphinQt/Settings/AdvancedPane.h | 1 + 22 files changed, 559 insertions(+), 132 deletions(-) diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index 5be2905096..f907b85647 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -190,6 +190,12 @@ bool CBoot::RunApploader(bool is_wii, const DiscIO::VolumeDisc& volume, INFO_LOG_FMT(BOOT, "DVDRead: offset: {:08x} memOffset: {:08x} length: {}", dvd_offset, ram_address, length); DVDRead(volume, dvd_offset, ram_address, length, partition); + for (u32 i = 0; i < length; i += 32) + { + if (PowerPC::ppcState.m_enable_dcache) + PowerPC::ppcState.dCache.Invalidate(ram_address + i); + PowerPC::ppcState.iCache.Invalidate(ram_address + i); + } DiscIO::Riivolution::ApplyApploaderMemoryPatches(riivolution_patches, ram_address, length); diff --git a/Source/Core/Core/Config/MainSettings.cpp b/Source/Core/Core/Config/MainSettings.cpp index c88849a795..a6f3a890aa 100644 --- a/Source/Core/Core/Config/MainSettings.cpp +++ b/Source/Core/Core/Config/MainSettings.cpp @@ -37,6 +37,7 @@ const Info MAIN_CPU_CORE{{System::Main, "Core", "CPUCore"}, PowerPC::DefaultCPUCore()}; const Info MAIN_JIT_FOLLOW_BRANCH{{System::Main, "Core", "JITFollowBranch"}, true}; const Info MAIN_FASTMEM{{System::Main, "Core", "Fastmem"}, true}; +const Info MAIN_ACCURATE_CPU_CACHE{{System::Main, "Core", "AccurateCPUCache"}, false}; const Info MAIN_DSP_HLE{{System::Main, "Core", "DSPHLE"}, true}; const Info MAIN_TIMING_VARIANCE{{System::Main, "Core", "TimingVariance"}, 40}; const Info MAIN_CPU_THREAD{{System::Main, "Core", "CPUThread"}, true}; diff --git a/Source/Core/Core/Config/MainSettings.h b/Source/Core/Core/Config/MainSettings.h index 0730681f2d..92b909adf5 100644 --- a/Source/Core/Core/Config/MainSettings.h +++ b/Source/Core/Core/Config/MainSettings.h @@ -55,6 +55,7 @@ extern const Info MAIN_SKIP_IPL; extern const Info MAIN_CPU_CORE; extern const Info MAIN_JIT_FOLLOW_BRANCH; extern const Info MAIN_FASTMEM; +extern const Info MAIN_ACCURATE_CPU_CACHE; // Should really be in the DSP section, but we're kind of stuck with bad decisions made in the past. extern const Info MAIN_DSP_HLE; extern const Info MAIN_TIMING_VARIANCE; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 9c50002204..acea1f5e7f 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -127,6 +127,7 @@ bool IsSettingSaveable(const Config::Location& config_location) &Config::MAIN_CPU_THREAD.GetLocation(), &Config::MAIN_MMU.GetLocation(), &Config::MAIN_PAUSE_ON_PANIC.GetLocation(), + &Config::MAIN_ACCURATE_CPU_CACHE.GetLocation(), &Config::MAIN_BB_DUMP_PORT.GetLocation(), &Config::MAIN_SYNC_GPU.GetLocation(), &Config::MAIN_SYNC_GPU_MAX_DISTANCE.GetLocation(), diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 88f9f9997c..401e98e8f1 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -438,14 +438,17 @@ void Interpreter::dcba(UGeckoInstruction inst) void Interpreter::dcbf(UGeckoInstruction inst) { - // TODO: Implement some sort of L2 emulation. - // TODO: Raise DSI if translation fails (except for direct-store segments). - - // Invalidate the JIT cache here as a heuristic to compensate for - // the lack of precise L1 icache emulation in the JIT. (Portable software - // should use icbi consistently, but games aren't portable.) const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - JitInterface::InvalidateICacheLine(address); + if (!PowerPC::ppcState.m_enable_dcache) + { + // Invalidate the JIT cache here as a heuristic to compensate for + // the lack of precise L1 icache emulation in the JIT. (Portable software + // should use icbi consistently, but games aren't portable.) + JitInterface::InvalidateICacheLine(address); + return; + } + + PowerPC::FlushCacheLine(address); } void Interpreter::dcbi(UGeckoInstruction inst) @@ -456,42 +459,44 @@ void Interpreter::dcbi(UGeckoInstruction inst) return; } - // TODO: Implement some sort of L2 emulation. - // TODO: Raise DSI if translation fails (except for direct-store segments). - - // Invalidate the JIT cache here as a heuristic to compensate for - // the lack of precise L1 icache emulation in the JIT. (Portable software - // should use icbi consistently, but games aren't portable.) const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - JitInterface::InvalidateICacheLine(address); + if (!PowerPC::ppcState.m_enable_dcache) + { + // Invalidate the JIT cache here as a heuristic to compensate for + // the lack of precise L1 icache emulation in the JIT. (Portable software + // should use icbi consistently, but games aren't portable.) + JitInterface::InvalidateICacheLine(address); + return; + } + + PowerPC::InvalidateCacheLine(address); } void Interpreter::dcbst(UGeckoInstruction inst) { - // TODO: Implement some sort of L2 emulation. - // TODO: Raise DSI if translation fails (except for direct-store segments). - - // Invalidate the JIT cache here as a heuristic to compensate for - // the lack of precise L1 icache emulation in the JIT. (Portable software - // should use icbi consistently, but games aren't portable.) const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - JitInterface::InvalidateICacheLine(address); + if (!PowerPC::ppcState.m_enable_dcache) + { + // Invalidate the JIT cache here as a heuristic to compensate for + // the lack of precise L1 icache emulation in the JIT. (Portable software + // should use icbi consistently, but games aren't portable.) + JitInterface::InvalidateICacheLine(address); + return; + } + + PowerPC::StoreCacheLine(address); } +// These instructions hint that it might be optimal to prefetch the specified cache line into the +// data cache. But the CPU is never guaranteed to do this fetch, and in practice it's not more +// performant to emulate it. + void Interpreter::dcbt(UGeckoInstruction inst) { - if (HID0.NOOPTI) - return; - - // TODO: Implement some sort of L2 emulation. } void Interpreter::dcbtst(UGeckoInstruction inst) { - if (HID0.NOOPTI) - return; - - // TODO: Implement some sort of L2 emulation. } void Interpreter::dcbz(UGeckoInstruction inst) @@ -504,14 +509,17 @@ void Interpreter::dcbz(UGeckoInstruction inst) return; } - // Hack to stop dcbz/dcbi over low MEM1 trashing memory. - if ((dcbz_addr < 0x80008000) && (dcbz_addr >= 0x80000000) && - Config::Get(Config::MAIN_LOW_DCBZ_HACK)) + if (!PowerPC::ppcState.m_enable_dcache) { - return; + // Hack to stop dcbz/dcbi over low MEM1 trashing memory. This is not needed if data cache + // emulation is enabled. + if ((dcbz_addr < 0x80008000) && (dcbz_addr >= 0x80000000) && + Config::Get(Config::MAIN_LOW_DCBZ_HACK)) + { + return; + } } - // TODO: Implement some sort of L2 emulation. PowerPC::ClearCacheLine(dcbz_addr & (~31)); } @@ -531,7 +539,6 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) return; } - // FAKE: clear memory instead of clearing the cache block PowerPC::ClearCacheLine(address & (~31)); } @@ -587,6 +594,7 @@ void Interpreter::icbi(UGeckoInstruction inst) { // TODO: Raise DSI if translation fails (except for direct-store segments). const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); + JitInterface::InvalidateICacheLine(address); PowerPC::ppcState.iCache.Invalidate(address); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 8699f05b75..6e01e1ecae 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -250,9 +250,32 @@ void Interpreter::mfspr(UGeckoInstruction inst) rSPR(index) &= ~1; } break; + case SPR_XER: rSPR(index) = PowerPC::GetXER().Hex; break; + + case SPR_UPMC1: + rSPR(index) = rSPR(SPR_PMC1); + break; + + case SPR_UPMC2: + rSPR(index) = rSPR(SPR_PMC2); + break; + + case SPR_UPMC3: + rSPR(index) = rSPR(SPR_PMC3); + break; + + case SPR_UPMC4: + rSPR(index) = rSPR(SPR_PMC4); + break; + + case SPR_IABR: + // A strange quirk: reading back this register on hardware will always have this bit set to 0 + // (despite the bit appearing to function normally when set). This does not apply to the DABR. + rGPR[inst.RD] = rSPR(index) & ~1; + return; } rGPR[inst.RD] = rSPR(index); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 046a0d6d94..969eb1bf16 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -229,6 +229,8 @@ void Jit64::lXXx(UGeckoInstruction inst) void Jit64::dcbx(UGeckoInstruction inst) { + FALLBACK_IF(m_accurate_cpu_cache_enabled); + INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 3629e2deec..d74c1bfbc2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -412,6 +412,11 @@ void Jit64::mfspr(UGeckoInstruction inst) case SPR_PMC2: case SPR_PMC3: case SPR_PMC4: + case SPR_UPMC1: + case SPR_UPMC2: + case SPR_UPMC3: + case SPR_UPMC4: + case SPR_IABR: FALLBACK_IF(true); default: { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 732094e3c1..386c0fb69b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -61,6 +61,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, { const u32 access_size = BackPatchInfo::GetFlagSize(flags); + if (m_accurate_cpu_cache_enabled) + mode = MemAccessMode::AlwaysSafe; + const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe; const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 6642cec959..63b97fca68 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -625,6 +625,8 @@ void JitArm64::stmw(UGeckoInstruction inst) void JitArm64::dcbx(UGeckoInstruction inst) { + FALLBACK_IF(m_accurate_cpu_cache_enabled); + INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index cc95654b76..6a626f5aef 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -395,6 +395,15 @@ void JitArm64::mfspr(UGeckoInstruction inst) break; case SPR_WPAR: case SPR_DEC: + case SPR_PMC1: + case SPR_PMC2: + case SPR_PMC3: + case SPR_PMC4: + case SPR_UPMC1: + case SPR_UPMC2: + case SPR_UPMC3: + case SPR_UPMC4: + case SPR_IABR: FALLBACK_IF(true); default: gpr.BindToRegister(d, false); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 679fdea0a8..27bdf3bb13 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -58,6 +58,13 @@ void JitBase::RefreshConfig() m_fastmem_enabled = Config::Get(Config::MAIN_FASTMEM); m_mmu_enabled = Core::System::GetInstance().IsMMUMode(); m_pause_on_panic_enabled = Core::System::GetInstance().IsPauseOnPanicMode(); + m_accurate_cpu_cache_enabled = Config::Get(Config::MAIN_ACCURATE_CPU_CACHE); + if (m_accurate_cpu_cache_enabled) + { + m_fastmem_enabled = false; + // This hack is unneeded if the data cache is being emulated. + m_low_dcbz_hack = false; + } analyzer.SetDebuggingEnabled(m_enable_debugging); analyzer.SetBranchFollowingEnabled(Config::Get(Config::MAIN_JIT_FOLLOW_BRANCH)); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 99c4d67485..ad218ed8a3 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -136,6 +136,7 @@ protected: bool m_fastmem_enabled = false; bool m_mmu_enabled = false; bool m_pause_on_panic_enabled = false; + bool m_accurate_cpu_cache_enabled = false; void RefreshConfig(); diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 4e44f8c5bf..1ea8397f96 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -187,6 +187,8 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address) return static_cast(var); } + bool wi = false; + if (!never_translate && MSR.DR) { auto translated_addr = TranslateAddress(em_address); @@ -197,6 +199,7 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address) return 0; } em_address = translated_addr.address; + wi = translated_addr.wi; } if (flag == XCheckTLBFlag::Read && (em_address & 0xF8000000) == 0x08000000) @@ -221,7 +224,18 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address) // Handle RAM; the masking intentionally discards bits (essentially creating // mirrors of memory). T value; - std::memcpy(&value, &memory.GetRAM()[em_address & memory.GetRamMask()], sizeof(T)); + em_address &= memory.GetRamMask(); + + if (!ppcState.m_enable_dcache || wi) + { + std::memcpy(&value, &memory.GetRAM()[em_address], sizeof(T)); + } + else + { + ppcState.dCache.Read(em_address, &value, sizeof(T), + HID0.DLOCK || flag != XCheckTLBFlag::Read); + } + return bswap(value); } @@ -229,7 +243,18 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address) (em_address & 0x0FFFFFFF) < memory.GetExRamSizeReal()) { T value; - std::memcpy(&value, &memory.GetEXRAM()[em_address & 0x0FFFFFFF], sizeof(T)); + em_address &= 0x0FFFFFFF; + + if (!ppcState.m_enable_dcache || wi) + { + std::memcpy(&value, &memory.GetEXRAM()[em_address], sizeof(T)); + } + else + { + ppcState.dCache.Read(em_address + 0x10000000, &value, sizeof(T), + HID0.DLOCK || flag != XCheckTLBFlag::Read); + } + return bswap(value); } @@ -391,14 +416,28 @@ static void WriteToHardware(Memory::MemoryManager& memory, u32 em_address, const { // Handle RAM; the masking intentionally discards bits (essentially creating // mirrors of memory). - std::memcpy(&memory.GetRAM()[em_address & memory.GetRamMask()], &swapped_data, size); + em_address &= memory.GetRamMask(); + + if (ppcState.m_enable_dcache && !wi) + ppcState.dCache.Write(em_address, &swapped_data, size, HID0.DLOCK); + + if (!ppcState.m_enable_dcache || wi || flag != XCheckTLBFlag::Write) + std::memcpy(&memory.GetRAM()[em_address], &swapped_data, size); + return; } if (memory.GetEXRAM() && (em_address >> 28) == 0x1 && (em_address & 0x0FFFFFFF) < memory.GetExRamSizeReal()) { - std::memcpy(&memory.GetEXRAM()[em_address & 0x0FFFFFFF], &swapped_data, size); + em_address &= 0x0FFFFFFF; + + if (ppcState.m_enable_dcache && !wi) + ppcState.dCache.Write(em_address + 0x10000000, &swapped_data, size, HID0.DLOCK); + + if (!ppcState.m_enable_dcache || wi || flag != XCheckTLBFlag::Write) + std::memcpy(&memory.GetEXRAM()[em_address], &swapped_data, size); + return; } @@ -1129,6 +1168,100 @@ void ClearCacheLine(u32 address) WriteToHardware(memory, address + i, 0, 4); } +void StoreCacheLine(u32 address) +{ + address &= ~0x1F; + + if (MSR.DR) + { + auto translated_address = TranslateAddress(address); + if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT) + { + return; + } + if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT) + { + // If translation fails, generate a DSI. + GenerateDSIException(address, true); + return; + } + address = translated_address.address; + } + + if (ppcState.m_enable_dcache) + ppcState.dCache.Store(address); +} + +void InvalidateCacheLine(u32 address) +{ + address &= ~0x1F; + + if (MSR.DR) + { + auto translated_address = TranslateAddress(address); + if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT) + { + return; + } + if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT) + { + return; + } + address = translated_address.address; + } + + if (ppcState.m_enable_dcache) + ppcState.dCache.Invalidate(address); +} + +void FlushCacheLine(u32 address) +{ + address &= ~0x1F; + + if (MSR.DR) + { + auto translated_address = TranslateAddress(address); + if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT) + { + return; + } + if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT) + { + // If translation fails, generate a DSI. + GenerateDSIException(address, true); + return; + } + address = translated_address.address; + } + + if (ppcState.m_enable_dcache) + ppcState.dCache.Flush(address); +} + +void TouchCacheLine(u32 address, bool store) +{ + address &= ~0x1F; + + if (MSR.DR) + { + auto translated_address = TranslateAddress(address); + if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT) + { + return; + } + if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT) + { + // If translation fails, generate a DSI. + GenerateDSIException(address, true); + return; + } + address = translated_address.address; + } + + if (ppcState.m_enable_dcache) + ppcState.dCache.Touch(address, store); +} + u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) { if (PowerPC::memchecks.HasAny()) diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 6eda9a22b7..bfb855478d 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -164,7 +164,12 @@ void Write_F64(double var, u32 address); void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks); void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks); + void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned +void StoreCacheLine(u32 address); +void InvalidateCacheLine(u32 address); +void FlushCacheLine(u32 address); +void TouchCacheLine(u32 address, bool store); // TLB functions void SDRUpdated(); diff --git a/Source/Core/Core/PowerPC/PPCCache.cpp b/Source/Core/Core/PowerPC/PPCCache.cpp index 89b85a60db..acdcfaf0c5 100644 --- a/Source/Core/Core/PowerPC/PPCCache.cpp +++ b/Source/Core/Core/PowerPC/PPCCache.cpp @@ -94,134 +94,270 @@ InstructionCache::~InstructionCache() Config::RemoveConfigChangedCallback(*m_config_callback_id); } -void InstructionCache::Reset() +void Cache::Reset() { valid.fill(0); plru.fill(0); + wrote.fill(0); lookup_table.fill(0xFF); lookup_table_ex.fill(0xFF); lookup_table_vmem.fill(0xFF); +} + +void InstructionCache::Reset() +{ + Cache::Reset(); JitInterface::ClearSafe(); } +void Cache::Init() +{ + data.fill({}); + tags.fill({}); + addrs.fill({}); + Reset(); +} + void InstructionCache::Init() { if (!m_config_callback_id) m_config_callback_id = Config::AddConfigChangedCallback([this] { RefreshConfig(); }); RefreshConfig(); - data.fill({}); - tags.fill({}); - Reset(); + Cache::Init(); } -void InstructionCache::Invalidate(u32 addr) -{ - if (!HID0.ICE || m_disable_icache) - return; - - // Invalidates the whole set - const u32 set = (addr >> 5) & 0x7f; - for (size_t i = 0; i < 8; i++) - { - if (valid[set] & (1U << i)) - { - if (tags[set][i] & (ICACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][i] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][i] & (ICACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][i] << 7) | set) & 0x1fffff] = 0xff; - else - lookup_table[((tags[set][i] << 7) | set) & 0xfffff] = 0xff; - } - } - valid[set] = 0; - JitInterface::InvalidateICacheLine(addr); -} - -u32 InstructionCache::ReadInstruction(u32 addr) +void Cache::Store(u32 addr) { auto& system = Core::System::GetInstance(); auto& memory = system.GetMemory(); - if (!HID0.ICE || m_disable_icache) // instruction cache is disabled - return memory.Read_U32(addr); - u32 set = (addr >> 5) & 0x7f; - u32 tag = addr >> 12; + auto [set, way] = GetCache(addr, true); - u32 t; - if (addr & ICACHE_VMEM_BIT) + if (way == 0xff) + return; + + if (valid[set] & (1U << way) && wrote[set] & (1U << way)) + memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); + wrote[set] &= ~(1U << way); +} + +void Cache::FlushAll() +{ + auto& system = Core::System::GetInstance(); + auto& memory = system.GetMemory(); + + for (size_t set = 0; set < CACHE_SETS; set++) { - t = lookup_table_vmem[(addr >> 5) & 0xfffff]; + for (size_t way = 0; way < CACHE_WAYS; way++) + { + if (valid[set] & (1U << way) && wrote[set] & (1U << way)) + memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); + } } - else if (addr & ICACHE_EXRAM_BIT) + + Reset(); +} + +void Cache::Invalidate(u32 addr) +{ + auto [set, way] = GetCache(addr, true); + + if (way == 0xff) + return; + + if (valid[set] & (1U << way)) { - t = lookup_table_ex[(addr >> 5) & 0x1fffff]; + if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) + lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) + lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + else + lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + + valid[set] &= ~(1U << way); + wrote[set] &= ~(1U << way); + } +} + +void Cache::Flush(u32 addr) +{ + auto& system = Core::System::GetInstance(); + auto& memory = system.GetMemory(); + + auto [set, way] = GetCache(addr, true); + + if (way == 0xff) + return; + + if (valid[set] & (1U << way)) + { + if (wrote[set] & (1U << way)) + memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); + + if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) + lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) + lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + else + lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + + valid[set] &= ~(1U << way); + wrote[set] &= ~(1U << way); + } +} + +void Cache::Touch(u32 addr, bool store) +{ + GetCache(addr, false); +} + +std::pair Cache::GetCache(u32 addr, bool locked) +{ + auto& system = Core::System::GetInstance(); + auto& memory = system.GetMemory(); + + addr &= ~31; + u32 set = (addr >> 5) & 0x7f; + u32 way; + + if (addr & CACHE_VMEM_BIT) + { + way = lookup_table_vmem[(addr >> 5) & 0xfffff]; + } + else if (addr & CACHE_EXRAM_BIT) + { + way = lookup_table_ex[(addr >> 5) & 0x1fffff]; } else { - t = lookup_table[(addr >> 5) & 0xfffff]; + way = lookup_table[(addr >> 5) & 0xfffff]; } - if (t == 0xff) // load to the cache + // load to the cache + if (!locked && way == 0xff) { - if (HID0.ILOCK) // instruction cache is locked - return memory.Read_U32(addr); + u32 tag = addr >> 12; + // select a way if (valid[set] != 0xff) - t = s_way_from_valid[valid[set]]; + way = s_way_from_valid[valid[set]]; else - t = s_way_from_plru[plru[set]]; - // load - memory.CopyFromEmu(reinterpret_cast(data[set][t].data()), (addr & ~0x1f), 32); - if (valid[set] & (1 << t)) + way = s_way_from_plru[plru[set]]; + + if (valid[set] & (1 << way)) { - if (tags[set][t] & (ICACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][t] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][t] & (ICACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][t] << 7) | set) & 0x1fffff] = 0xff; + // store the cache back to main memory + if (wrote[set] & (1 << way)) + memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); + + if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) + lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) + lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; else - lookup_table[((tags[set][t] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; } - if (addr & ICACHE_VMEM_BIT) - lookup_table_vmem[(addr >> 5) & 0xfffff] = t; - else if (addr & ICACHE_EXRAM_BIT) - lookup_table_ex[(addr >> 5) & 0x1fffff] = t; + // load + memory.CopyFromEmu(reinterpret_cast(data[set][way].data()), (addr & ~0x1f), 32); + + if (addr & CACHE_VMEM_BIT) + lookup_table_vmem[(addr >> 5) & 0xfffff] = way; + else if (addr & CACHE_EXRAM_BIT) + lookup_table_ex[(addr >> 5) & 0x1fffff] = way; else - lookup_table[(addr >> 5) & 0xfffff] = t; - tags[set][t] = tag; - valid[set] |= (1 << t); + lookup_table[(addr >> 5) & 0xfffff] = way; + tags[set][way] = tag; + addrs[set][way] = addr; + valid[set] |= (1 << way); + wrote[set] &= ~(1 << way); } + // update plru - plru[set] = (plru[set] & ~s_plru_mask[t]) | s_plru_value[t]; - const u32 res = Common::swap32(data[set][t][(addr >> 2) & 7]); - const u32 inmem = memory.Read_U32(addr); - if (res != inmem) - { - INFO_LOG_FMT(POWERPC, - "ICache read at {:08x} returned stale data: CACHED: {:08x} vs. RAM: {:08x}", addr, - res, inmem); - DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::ICACHE_MATTERS); - } - return res; + if (way != 0xff) + plru[set] = (plru[set] & ~s_plru_mask[way]) | s_plru_value[way]; + + return {set, way}; } -void InstructionCache::DoState(PointerWrap& p) +void Cache::Read(u32 addr, void* buffer, u32 len, bool locked) +{ + auto& system = Core::System::GetInstance(); + auto& memory = system.GetMemory(); + + auto* value = static_cast(buffer); + + while (len > 0) + { + auto [set, way] = GetCache(addr, locked); + + u32 offset_in_block = addr - (addr & ~31); + u32 len_in_block = std::min(len, ((addr + 32) & ~31) - addr); + + if (way != 0xff) + { + std::memcpy(value, reinterpret_cast(data[set][way].data()) + offset_in_block, + len_in_block); + } + else + { + memory.CopyFromEmu(value, addr, len_in_block); + } + + addr += len_in_block; + len -= len_in_block; + value += len_in_block; + } +} + +void Cache::Write(u32 addr, const void* buffer, u32 len, bool locked) +{ + auto& system = Core::System::GetInstance(); + auto& memory = system.GetMemory(); + + auto* value = static_cast(buffer); + + while (len > 0) + { + auto [set, way] = GetCache(addr, locked); + + u32 offset_in_block = addr - (addr & ~31); + u32 len_in_block = std::min(len, ((addr + 32) & ~31) - addr); + + if (way != 0xff) + { + std::memcpy(reinterpret_cast(data[set][way].data()) + offset_in_block, value, + len_in_block); + wrote[set] |= (1 << way); + } + else + { + memory.CopyToEmu(addr, value, len_in_block); + } + + addr += len_in_block; + len -= len_in_block; + value += len_in_block; + } +} + +void Cache::DoState(PointerWrap& p) { if (p.IsReadMode()) { // Clear valid parts of the lookup tables (this is done instead of using fill(0xff) to avoid // loading the entire 4MB of tables into cache) - for (u32 set = 0; set < ICACHE_SETS; set++) + for (u32 set = 0; set < CACHE_SETS; set++) { - for (u32 way = 0; way < ICACHE_WAYS; way++) + for (u32 way = 0; way < CACHE_WAYS; way++) { if ((valid[set] & (1 << way)) != 0) { const u32 addr = (tags[set][way] << 12) | (set << 5); - if (addr & ICACHE_VMEM_BIT) + if (addr & CACHE_VMEM_BIT) lookup_table_vmem[(addr >> 5) & 0xfffff] = 0xff; - else if (addr & ICACHE_EXRAM_BIT) + else if (addr & CACHE_EXRAM_BIT) lookup_table_ex[(addr >> 5) & 0x1fffff] = 0xff; else lookup_table[(addr >> 5) & 0xfffff] = 0xff; @@ -234,20 +370,22 @@ void InstructionCache::DoState(PointerWrap& p) p.DoArray(tags); p.DoArray(plru); p.DoArray(valid); + p.DoArray(addrs); + p.DoArray(wrote); if (p.IsReadMode()) { // Recompute lookup tables - for (u32 set = 0; set < ICACHE_SETS; set++) + for (u32 set = 0; set < CACHE_SETS; set++) { - for (u32 way = 0; way < ICACHE_WAYS; way++) + for (u32 way = 0; way < CACHE_WAYS; way++) { if ((valid[set] & (1 << way)) != 0) { const u32 addr = (tags[set][way] << 12) | (set << 5); - if (addr & ICACHE_VMEM_BIT) + if (addr & CACHE_VMEM_BIT) lookup_table_vmem[(addr >> 5) & 0xfffff] = way; - else if (addr & ICACHE_EXRAM_BIT) + else if (addr & CACHE_EXRAM_BIT) lookup_table_ex[(addr >> 5) & 0x1fffff] = way; else lookup_table[(addr >> 5) & 0xfffff] = way; @@ -257,6 +395,29 @@ void InstructionCache::DoState(PointerWrap& p) } } +u32 InstructionCache::ReadInstruction(u32 addr) +{ + auto& system = Core::System::GetInstance(); + auto& memory = system.GetMemory(); + + if (!HID0.ICE || m_disable_icache) // instruction cache is disabled + return memory.Read_U32(addr); + + u32 value; + Read(addr, &value, sizeof(value), HID0.ILOCK); + return Common::swap32(value); +} + +void InstructionCache::Invalidate(u32 addr) +{ + if (!HID0.ICE || m_disable_icache) + return; + + Cache::Invalidate(addr); + + JitInterface::InvalidateICacheLine(addr); +} + void InstructionCache::RefreshConfig() { m_disable_icache = Config::Get(Config::MAIN_DISABLE_ICACHE); diff --git a/Source/Core/Core/PowerPC/PPCCache.h b/Source/Core/Core/PowerPC/PPCCache.h index 4b9906ea42..8aa6c4c811 100644 --- a/Source/Core/Core/PowerPC/PPCCache.h +++ b/Source/Core/Core/PowerPC/PPCCache.h @@ -12,20 +12,22 @@ class PointerWrap; namespace PowerPC { -constexpr u32 ICACHE_SETS = 128; -constexpr u32 ICACHE_WAYS = 8; +constexpr u32 CACHE_SETS = 128; +constexpr u32 CACHE_WAYS = 8; // size of an instruction cache block in words -constexpr u32 ICACHE_BLOCK_SIZE = 8; +constexpr u32 CACHE_BLOCK_SIZE = 8; -constexpr u32 ICACHE_EXRAM_BIT = 0x10000000; -constexpr u32 ICACHE_VMEM_BIT = 0x20000000; +constexpr u32 CACHE_EXRAM_BIT = 0x10000000; +constexpr u32 CACHE_VMEM_BIT = 0x20000000; -struct InstructionCache +struct Cache { - std::array, ICACHE_WAYS>, ICACHE_SETS> data{}; - std::array, ICACHE_SETS> tags{}; - std::array plru{}; - std::array valid{}; + std::array, CACHE_WAYS>, CACHE_SETS> data{}; + std::array, CACHE_SETS> tags{}; + std::array plru{}; + std::array valid{}; + std::array, CACHE_SETS> addrs{}; + std::array wrote{}; // Note: This is only for performance purposes; this same data could be computed at runtime // from the tags and valid fields (and that's how it's done on the actual cache) @@ -33,16 +35,36 @@ struct InstructionCache std::array lookup_table_ex{}; std::array lookup_table_vmem{}; - bool m_disable_icache = false; + void Store(u32 addr); + void Invalidate(u32 addr); + void Flush(u32 addr); + void Touch(u32 addr, bool store); + + void FlushAll(); + + std::pair GetCache(u32 addr, bool locked); + + void Read(u32 addr, void* buffer, u32 len, bool locked); + void Write(u32 addr, const void* buffer, u32 len, bool locked); + + void Init(); + void Reset(); + + void DoState(PointerWrap& p); +}; + +struct InstructionCache : public Cache +{ std::optional m_config_callback_id = std::nullopt; + bool m_disable_icache = false; + InstructionCache() = default; ~InstructionCache(); u32 ReadInstruction(u32 addr); void Invalidate(u32 addr); void Init(); void Reset(); - void DoState(PointerWrap& p); void RefreshConfig(); }; } // namespace PowerPC diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index eafa547e92..f05ed37065 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -132,9 +132,16 @@ void DoState(PointerWrap& p) p.Do(ppcState.reserve_address); ppcState.iCache.DoState(p); + ppcState.dCache.DoState(p); if (p.IsReadMode()) { + if (!ppcState.m_enable_dcache) + { + INFO_LOG_FMT(POWERPC, "Flushing data cache"); + ppcState.dCache.FlushAll(); + } + RoundingModeUpdated(); IBATUpdated(); DBATUpdated(); @@ -266,6 +273,16 @@ void Init(CPUCore cpu_core) InitializeCPUCore(cpu_core); ppcState.iCache.Init(); + ppcState.dCache.Init(); + + if (Config::Get(Config::MAIN_ACCURATE_CPU_CACHE)) + { + ppcState.m_enable_dcache = true; + } + else + { + ppcState.m_enable_dcache = false; + } if (Config::Get(Config::MAIN_ENABLE_DEBUGGING)) breakpoints.ClearAllTemporary(); @@ -279,6 +296,7 @@ void Reset() ResetRegisters(); ppcState.iCache.Reset(); + ppcState.dCache.Reset(); } void ScheduleInvalidateCacheThreadSafe(u32 address) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index df60432fce..4d70ab7439 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -172,6 +172,8 @@ struct PowerPCState u32 pagetable_hashmask = 0; InstructionCache iCache; + bool m_enable_dcache = false; + Cache dCache; // Reservation monitor for lwarx and its friend stwcxd. bool reserve; diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index f5e494e6d1..d0cb1d0d3b 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -95,7 +95,7 @@ static size_t s_state_writes_in_queue; static std::condition_variable s_state_write_queue_is_empty; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 156; // Last changed in PR 11184 +constexpr u32 STATE_VERSION = 157; // Last changed in PR 11183 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, @@ -223,14 +223,18 @@ static void DoState(PointerWrap& p) g_video_backend->DoState(p); p.DoMarker("video_backend"); - PowerPC::DoState(p); - p.DoMarker("PowerPC"); // CoreTiming needs to be restored before restoring Hardware because // the controller code might need to schedule an event if the controller has changed. system.GetCoreTiming().DoState(p); p.DoMarker("CoreTiming"); + + // HW needs to be restored before PowerPC because the data cache might need to be flushed. HW::DoState(p); p.DoMarker("HW"); + + PowerPC::DoState(p); + p.DoMarker("PowerPC"); + if (SConfig::GetInstance().bWii) Wiimote::DoState(p); p.DoMarker("Wiimote"); diff --git a/Source/Core/DolphinQt/Settings/AdvancedPane.cpp b/Source/Core/DolphinQt/Settings/AdvancedPane.cpp index f08f9d1570..62e234219d 100644 --- a/Source/Core/DolphinQt/Settings/AdvancedPane.cpp +++ b/Source/Core/DolphinQt/Settings/AdvancedPane.cpp @@ -74,6 +74,12 @@ void AdvancedPane::CreateLayout() "affect performance.\nThe performance impact is the same as having Enable MMU on.")); cpu_options_group_layout->addWidget(m_pause_on_panic_checkbox); + m_accurate_cpu_cache_checkbox = new QCheckBox(tr("Enable Write-Back Cache (slow)")); + m_accurate_cpu_cache_checkbox->setToolTip( + tr("Enables emulation of the CPU write-back cache.\nEnabling will have a significant impact " + "on performance.\nThis should be left disabled unless absolutely needed.")); + cpu_options_group_layout->addWidget(m_accurate_cpu_cache_checkbox); + auto* clock_override = new QGroupBox(tr("Clock Override")); auto* clock_override_layout = new QVBoxLayout(); clock_override->setLayout(clock_override_layout); @@ -189,6 +195,9 @@ void AdvancedPane::ConnectLayout() connect(m_pause_on_panic_checkbox, &QCheckBox::toggled, this, [](bool checked) { Config::SetBaseOrCurrent(Config::MAIN_PAUSE_ON_PANIC, checked); }); + connect(m_accurate_cpu_cache_checkbox, &QCheckBox::toggled, this, + [](bool checked) { Config::SetBaseOrCurrent(Config::MAIN_ACCURATE_CPU_CACHE, checked); }); + m_cpu_clock_override_checkbox->setChecked(Config::Get(Config::MAIN_OVERCLOCK_ENABLE)); connect(m_cpu_clock_override_checkbox, &QCheckBox::toggled, [this](bool enable_clock_override) { Config::SetBaseOrCurrent(Config::MAIN_OVERCLOCK_ENABLE, enable_clock_override); @@ -258,6 +267,9 @@ void AdvancedPane::Update() m_pause_on_panic_checkbox->setChecked(Config::Get(Config::MAIN_PAUSE_ON_PANIC)); m_pause_on_panic_checkbox->setEnabled(!running); + m_accurate_cpu_cache_checkbox->setChecked(Config::Get(Config::MAIN_ACCURATE_CPU_CACHE)); + m_accurate_cpu_cache_checkbox->setEnabled(!running); + QFont bf = font(); bf.setBold(Config::GetActiveLayerForConfig(Config::MAIN_OVERCLOCK_ENABLE) != Config::LayerType::Base); diff --git a/Source/Core/DolphinQt/Settings/AdvancedPane.h b/Source/Core/DolphinQt/Settings/AdvancedPane.h index c74aeacf09..b4fdb141cd 100644 --- a/Source/Core/DolphinQt/Settings/AdvancedPane.h +++ b/Source/Core/DolphinQt/Settings/AdvancedPane.h @@ -33,6 +33,7 @@ private: QComboBox* m_cpu_emulation_engine_combobox; QCheckBox* m_enable_mmu_checkbox; QCheckBox* m_pause_on_panic_checkbox; + QCheckBox* m_accurate_cpu_cache_checkbox; QCheckBox* m_cpu_clock_override_checkbox; QSlider* m_cpu_clock_override_slider; QLabel* m_cpu_clock_override_slider_label; From 9d39647f9e3be1bdc2ee2cfc5ce6af2e560b4cc1 Mon Sep 17 00:00:00 2001 From: TheLordScruffy Date: Mon, 2 Jan 2023 02:33:57 -0500 Subject: [PATCH 2/5] Fix PPC cache code formatting --- Source/Core/Core/Boot/Boot_BS2Emu.cpp | 6 - .../Interpreter/Interpreter_LoadStore.cpp | 11 +- .../Interpreter_SystemRegisters.cpp | 5 +- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 2 +- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 2 +- Source/Core/Core/PowerPC/MMU.cpp | 10 +- Source/Core/Core/PowerPC/MMU.h | 10 +- Source/Core/Core/PowerPC/PPCCache.cpp | 104 ++++++++++-------- Source/Core/Core/PowerPC/PPCCache.h | 5 +- Source/Core/Core/PowerPC/PowerPC.cpp | 13 +-- 10 files changed, 83 insertions(+), 85 deletions(-) diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index f907b85647..5be2905096 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -190,12 +190,6 @@ bool CBoot::RunApploader(bool is_wii, const DiscIO::VolumeDisc& volume, INFO_LOG_FMT(BOOT, "DVDRead: offset: {:08x} memOffset: {:08x} length: {}", dvd_offset, ram_address, length); DVDRead(volume, dvd_offset, ram_address, length, partition); - for (u32 i = 0; i < length; i += 32) - { - if (PowerPC::ppcState.m_enable_dcache) - PowerPC::ppcState.dCache.Invalidate(ram_address + i); - PowerPC::ppcState.iCache.Invalidate(ram_address + i); - } DiscIO::Riivolution::ApplyApploaderMemoryPatches(riivolution_patches, ram_address, length); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 401e98e8f1..1c1a341c85 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -448,7 +448,7 @@ void Interpreter::dcbf(UGeckoInstruction inst) return; } - PowerPC::FlushCacheLine(address); + PowerPC::FlushDCacheLine(address); } void Interpreter::dcbi(UGeckoInstruction inst) @@ -469,7 +469,7 @@ void Interpreter::dcbi(UGeckoInstruction inst) return; } - PowerPC::InvalidateCacheLine(address); + PowerPC::InvalidateDCacheLine(address); } void Interpreter::dcbst(UGeckoInstruction inst) @@ -484,7 +484,7 @@ void Interpreter::dcbst(UGeckoInstruction inst) return; } - PowerPC::StoreCacheLine(address); + PowerPC::StoreDCacheLine(address); } // These instructions hint that it might be optimal to prefetch the specified cache line into the @@ -520,7 +520,7 @@ void Interpreter::dcbz(UGeckoInstruction inst) } } - PowerPC::ClearCacheLine(dcbz_addr & (~31)); + PowerPC::ClearDCacheLine(dcbz_addr & (~31)); } void Interpreter::dcbz_l(UGeckoInstruction inst) @@ -539,7 +539,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst) return; } - PowerPC::ClearCacheLine(address & (~31)); + PowerPC::ClearDCacheLine(address & (~31)); } // eciwx/ecowx technically should access the specified device @@ -594,7 +594,6 @@ void Interpreter::icbi(UGeckoInstruction inst) { // TODO: Raise DSI if translation fails (except for direct-store segments). const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - JitInterface::InvalidateICacheLine(address); PowerPC::ppcState.iCache.Invalidate(address); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 6e01e1ecae..09a96aafeb 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -272,8 +272,9 @@ void Interpreter::mfspr(UGeckoInstruction inst) break; case SPR_IABR: - // A strange quirk: reading back this register on hardware will always have this bit set to 0 - // (despite the bit appearing to function normally when set). This does not apply to the DABR. + // A strange quirk: reading back this register on hardware will always have the TE (Translation + // enabled) bit set to 0 (despite the bit appearing to function normally when set). This does + // not apply to the DABR. rGPR[inst.RD] = rSPR(index) & ~1; return; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 969eb1bf16..fe95271136 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -446,7 +446,7 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); - ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH); + ABI_CallFunctionR(PowerPC::ClearDCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (emit_fast_path) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 386c0fb69b..e32d166be2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -231,7 +231,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - MOVP2R(ARM64Reg::X8, &PowerPC::ClearCacheLine); + MOVP2R(ARM64Reg::X8, &PowerPC::ClearDCacheLine); BLR(ARM64Reg::X8); } else diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 1ea8397f96..1ccc81d959 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1137,7 +1137,7 @@ void DMA_MemoryToLC(const u32 cache_address, const u32 mem_address, const u32 nu memcpy(dst, src, 32 * num_blocks); } -void ClearCacheLine(u32 address) +void ClearDCacheLine(u32 address) { DEBUG_ASSERT((address & 0x1F) == 0); if (MSR.DR) @@ -1168,7 +1168,7 @@ void ClearCacheLine(u32 address) WriteToHardware(memory, address + i, 0, 4); } -void StoreCacheLine(u32 address) +void StoreDCacheLine(u32 address) { address &= ~0x1F; @@ -1192,7 +1192,7 @@ void StoreCacheLine(u32 address) ppcState.dCache.Store(address); } -void InvalidateCacheLine(u32 address) +void InvalidateDCacheLine(u32 address) { address &= ~0x1F; @@ -1214,7 +1214,7 @@ void InvalidateCacheLine(u32 address) ppcState.dCache.Invalidate(address); } -void FlushCacheLine(u32 address) +void FlushDCacheLine(u32 address) { address &= ~0x1F; @@ -1238,7 +1238,7 @@ void FlushCacheLine(u32 address) ppcState.dCache.Flush(address); } -void TouchCacheLine(u32 address, bool store) +void TouchDCacheLine(u32 address, bool store) { address &= ~0x1F; diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index bfb855478d..44b9785611 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -165,11 +165,11 @@ void Write_F64(double var, u32 address); void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks); void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks); -void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned -void StoreCacheLine(u32 address); -void InvalidateCacheLine(u32 address); -void FlushCacheLine(u32 address); -void TouchCacheLine(u32 address, bool store); +void ClearDCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned +void StoreDCacheLine(u32 address); +void InvalidateDCacheLine(u32 address); +void FlushDCacheLine(u32 address); +void TouchDCacheLine(u32 address, bool store); // TLB functions void SDRUpdated(); diff --git a/Source/Core/Core/PowerPC/PPCCache.cpp b/Source/Core/Core/PowerPC/PPCCache.cpp index acdcfaf0c5..b13793522e 100644 --- a/Source/Core/Core/PowerPC/PPCCache.cpp +++ b/Source/Core/Core/PowerPC/PPCCache.cpp @@ -98,7 +98,7 @@ void Cache::Reset() { valid.fill(0); plru.fill(0); - wrote.fill(0); + modified.fill(0); lookup_table.fill(0xFF); lookup_table_ex.fill(0xFF); lookup_table_vmem.fill(0xFF); @@ -113,7 +113,6 @@ void InstructionCache::Reset() void Cache::Init() { data.fill({}); - tags.fill({}); addrs.fill({}); Reset(); } @@ -137,9 +136,9 @@ void Cache::Store(u32 addr) if (way == 0xff) return; - if (valid[set] & (1U << way) && wrote[set] & (1U << way)) + if (valid[set] & (1U << way) && modified[set] & (1U << way)) memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); - wrote[set] &= ~(1U << way); + modified[set] &= ~(1U << way); } void Cache::FlushAll() @@ -151,7 +150,7 @@ void Cache::FlushAll() { for (size_t way = 0; way < CACHE_WAYS; way++) { - if (valid[set] & (1U << way) && wrote[set] & (1U << way)) + if (valid[set] & (1U << way) && modified[set] & (1U << way)) memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); } } @@ -168,15 +167,15 @@ void Cache::Invalidate(u32 addr) if (valid[set] & (1U << way)) { - if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; valid[set] &= ~(1U << way); - wrote[set] &= ~(1U << way); + modified[set] &= ~(1U << way); } } @@ -192,18 +191,18 @@ void Cache::Flush(u32 addr) if (valid[set] & (1U << way)) { - if (wrote[set] & (1U << way)) + if (modified[set] & (1U << way)) memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); - if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; valid[set] &= ~(1U << way); - wrote[set] &= ~(1U << way); + modified[set] &= ~(1U << way); } } @@ -237,8 +236,6 @@ std::pair Cache::GetCache(u32 addr, bool locked) // load to the cache if (!locked && way == 0xff) { - u32 tag = addr >> 12; - // select a way if (valid[set] != 0xff) way = s_way_from_valid[valid[set]]; @@ -248,15 +245,15 @@ std::pair Cache::GetCache(u32 addr, bool locked) if (valid[set] & (1 << way)) { // store the cache back to main memory - if (wrote[set] & (1 << way)) + if (modified[set] & (1 << way)) memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); - if (tags[set][way] & (CACHE_VMEM_BIT >> 12)) - lookup_table_vmem[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; - else if (tags[set][way] & (CACHE_EXRAM_BIT >> 12)) - lookup_table_ex[((tags[set][way] << 7) | set) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[((tags[set][way] << 7) | set) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; } // load @@ -268,15 +265,14 @@ std::pair Cache::GetCache(u32 addr, bool locked) lookup_table_ex[(addr >> 5) & 0x1fffff] = way; else lookup_table[(addr >> 5) & 0xfffff] = way; - tags[set][way] = tag; addrs[set][way] = addr; valid[set] |= (1 << way); - wrote[set] &= ~(1 << way); - } + modified[set] &= ~(1 << way); - // update plru - if (way != 0xff) - plru[set] = (plru[set] & ~s_plru_mask[way]) | s_plru_value[way]; + // update plru + if (way != 0xff) + plru[set] = (plru[set] & ~s_plru_mask[way]) | s_plru_value[way]; + } return {set, way}; } @@ -329,7 +325,7 @@ void Cache::Write(u32 addr, const void* buffer, u32 len, bool locked) { std::memcpy(reinterpret_cast(data[set][way].data()) + offset_in_block, value, len_in_block); - wrote[set] |= (1 << way); + modified[set] |= (1 << way); } else { @@ -354,24 +350,22 @@ void Cache::DoState(PointerWrap& p) { if ((valid[set] & (1 << way)) != 0) { - const u32 addr = (tags[set][way] << 12) | (set << 5); - if (addr & CACHE_VMEM_BIT) - lookup_table_vmem[(addr >> 5) & 0xfffff] = 0xff; - else if (addr & CACHE_EXRAM_BIT) - lookup_table_ex[(addr >> 5) & 0x1fffff] = 0xff; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[(addr >> 5) & 0xfffff] = 0xff; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; } } } } p.DoArray(data); - p.DoArray(tags); p.DoArray(plru); p.DoArray(valid); p.DoArray(addrs); - p.DoArray(wrote); + p.DoArray(modified); if (p.IsReadMode()) { @@ -382,13 +376,12 @@ void Cache::DoState(PointerWrap& p) { if ((valid[set] & (1 << way)) != 0) { - const u32 addr = (tags[set][way] << 12) | (set << 5); - if (addr & CACHE_VMEM_BIT) - lookup_table_vmem[(addr >> 5) & 0xfffff] = way; - else if (addr & CACHE_EXRAM_BIT) - lookup_table_ex[(addr >> 5) & 0x1fffff] = way; + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; else - lookup_table[(addr >> 5) & 0xfffff] = way; + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; } } } @@ -413,7 +406,22 @@ void InstructionCache::Invalidate(u32 addr) if (!HID0.ICE || m_disable_icache) return; - Cache::Invalidate(addr); + // Invalidates the whole set + const u32 set = (addr >> 5) & 0x7f; + for (size_t way = 0; way < 8; way++) + { + if (valid[set] & (1U << way)) + { + if (addrs[set][way] & CACHE_VMEM_BIT) + lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + else if (addrs[set][way] & CACHE_EXRAM_BIT) + lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + else + lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + } + } + valid[set] = 0; + modified[set] = 0; JitInterface::InvalidateICacheLine(addr); } diff --git a/Source/Core/Core/PowerPC/PPCCache.h b/Source/Core/Core/PowerPC/PPCCache.h index 8aa6c4c811..6e0d1dc33e 100644 --- a/Source/Core/Core/PowerPC/PPCCache.h +++ b/Source/Core/Core/PowerPC/PPCCache.h @@ -23,11 +23,10 @@ constexpr u32 CACHE_VMEM_BIT = 0x20000000; struct Cache { std::array, CACHE_WAYS>, CACHE_SETS> data{}; - std::array, CACHE_SETS> tags{}; + std::array, CACHE_SETS> addrs{}; std::array plru{}; std::array valid{}; - std::array, CACHE_SETS> addrs{}; - std::array wrote{}; + std::array modified{}; // Note: This is only for performance purposes; this same data could be computed at runtime // from the tags and valid fields (and that's how it's done on the actual cache) diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index f05ed37065..411705d833 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -141,6 +141,10 @@ void DoState(PointerWrap& p) INFO_LOG_FMT(POWERPC, "Flushing data cache"); ppcState.dCache.FlushAll(); } + else + { + ppcState.dCache.Reset(); + } RoundingModeUpdated(); IBATUpdated(); @@ -275,14 +279,7 @@ void Init(CPUCore cpu_core) ppcState.iCache.Init(); ppcState.dCache.Init(); - if (Config::Get(Config::MAIN_ACCURATE_CPU_CACHE)) - { - ppcState.m_enable_dcache = true; - } - else - { - ppcState.m_enable_dcache = false; - } + ppcState.m_enable_dcache = Config::Get(Config::MAIN_ACCURATE_CPU_CACHE); if (Config::Get(Config::MAIN_ENABLE_DEBUGGING)) breakpoints.ClearAllTemporary(); From 811d94222217fadecdd54d5b7fc16b584d52f11a Mon Sep 17 00:00:00 2001 From: TheLordScruffy Date: Sat, 7 Jan 2023 07:30:42 -0500 Subject: [PATCH 3/5] Improve PPCCache lookup table --- Source/Core/Core/PowerPC/PPCCache.cpp | 37 ++++++++++++++------------- Source/Core/Core/PowerPC/PPCCache.h | 6 +++++ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Source/Core/Core/PowerPC/PPCCache.cpp b/Source/Core/Core/PowerPC/PPCCache.cpp index b13793522e..d56d77927c 100644 --- a/Source/Core/Core/PowerPC/PPCCache.cpp +++ b/Source/Core/Core/PowerPC/PPCCache.cpp @@ -168,11 +168,11 @@ void Cache::Invalidate(u32 addr) if (valid[set] & (1U << way)) { if (addrs[set][way] & CACHE_VMEM_BIT) - lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table_vmem[(addrs[set][way] >> 5) & 0xfffff] = 0xff; else if (addrs[set][way] & CACHE_EXRAM_BIT) - lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + lookup_table_ex[(addrs[set][way] >> 5) & 0x1fffff] = 0xff; else - lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table[(addrs[set][way] >> 5) & 0xfffff] = 0xff; valid[set] &= ~(1U << way); modified[set] &= ~(1U << way); @@ -195,11 +195,11 @@ void Cache::Flush(u32 addr) memory.CopyToEmu((addr & ~0x1f), reinterpret_cast(data[set][way].data()), 32); if (addrs[set][way] & CACHE_VMEM_BIT) - lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table_vmem[(addrs[set][way] >> 5) & 0xfffff] = 0xff; else if (addrs[set][way] & CACHE_EXRAM_BIT) - lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + lookup_table_ex[(addrs[set][way] >> 5) & 0x1fffff] = 0xff; else - lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table[(addrs[set][way] >> 5) & 0xfffff] = 0xff; valid[set] &= ~(1U << way); modified[set] &= ~(1U << way); @@ -249,11 +249,11 @@ std::pair Cache::GetCache(u32 addr, bool locked) memory.CopyToEmu(addrs[set][way], reinterpret_cast(data[set][way].data()), 32); if (addrs[set][way] & CACHE_VMEM_BIT) - lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table_vmem[(addrs[set][way] >> 5) & 0xfffff] = 0xff; else if (addrs[set][way] & CACHE_EXRAM_BIT) - lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + lookup_table_ex[(addrs[set][way] >> 5) & 0x1fffff] = 0xff; else - lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table[(addrs[set][way] >> 5) & 0xfffff] = 0xff; } // load @@ -265,6 +265,7 @@ std::pair Cache::GetCache(u32 addr, bool locked) lookup_table_ex[(addr >> 5) & 0x1fffff] = way; else lookup_table[(addr >> 5) & 0xfffff] = way; + addrs[set][way] = addr; valid[set] |= (1 << way); modified[set] &= ~(1 << way); @@ -351,11 +352,11 @@ void Cache::DoState(PointerWrap& p) if ((valid[set] & (1 << way)) != 0) { if (addrs[set][way] & CACHE_VMEM_BIT) - lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table_vmem[(addrs[set][way] >> 5) & 0xfffff] = 0xff; else if (addrs[set][way] & CACHE_EXRAM_BIT) - lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + lookup_table_ex[(addrs[set][way] >> 5) & 0x1fffff] = 0xff; else - lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table[(addrs[set][way] >> 5) & 0xfffff] = 0xff; } } } @@ -377,11 +378,11 @@ void Cache::DoState(PointerWrap& p) if ((valid[set] & (1 << way)) != 0) { if (addrs[set][way] & CACHE_VMEM_BIT) - lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table_vmem[(addrs[set][way] >> 5) & 0xfffff] = 0xff; else if (addrs[set][way] & CACHE_EXRAM_BIT) - lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + lookup_table_ex[(addrs[set][way] >> 5) & 0x1fffff] = 0xff; else - lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table[(addrs[set][way] >> 5) & 0xfffff] = 0xff; } } } @@ -413,11 +414,11 @@ void InstructionCache::Invalidate(u32 addr) if (valid[set] & (1U << way)) { if (addrs[set][way] & CACHE_VMEM_BIT) - lookup_table_vmem[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table_vmem[(addrs[set][way] >> 5) & 0xfffff] = 0xff; else if (addrs[set][way] & CACHE_EXRAM_BIT) - lookup_table_ex[((addrs[set][way] >> 5) & 0x1fff80) | set] = 0xff; + lookup_table_ex[(addrs[set][way] >> 5) & 0x1fffff] = 0xff; else - lookup_table[((addrs[set][way] >> 5) & 0xfff80) | set] = 0xff; + lookup_table[(addrs[set][way] >> 5) & 0xfffff] = 0xff; } } valid[set] = 0; diff --git a/Source/Core/Core/PowerPC/PPCCache.h b/Source/Core/Core/PowerPC/PPCCache.h index 6e0d1dc33e..b39cc3d8e6 100644 --- a/Source/Core/Core/PowerPC/PPCCache.h +++ b/Source/Core/Core/PowerPC/PPCCache.h @@ -23,7 +23,13 @@ constexpr u32 CACHE_VMEM_BIT = 0x20000000; struct Cache { std::array, CACHE_WAYS>, CACHE_SETS> data{}; + + // Stores the 32-byte aligned address of the start of each cache block. This consists of the cache + // set and tag. Real hardware only needs to store the tag, but also including the set simplifies + // debugging and getting the actual address in the cache, without changing behavior (as the set + // portion of the address is by definition the same for all addresses in a set). std::array, CACHE_SETS> addrs{}; + std::array plru{}; std::array valid{}; std::array modified{}; From 825bacde43b1471b84ce60a169ca803a9bfaa0d1 Mon Sep 17 00:00:00 2001 From: TheLordScruffy Date: Sat, 7 Jan 2023 20:18:59 -0500 Subject: [PATCH 4/5] PPCCache: Change u32 to u8 for plru, valid, modified --- Source/Core/Core/PowerPC/PPCCache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/PPCCache.h b/Source/Core/Core/PowerPC/PPCCache.h index b39cc3d8e6..49843c9b74 100644 --- a/Source/Core/Core/PowerPC/PPCCache.h +++ b/Source/Core/Core/PowerPC/PPCCache.h @@ -30,9 +30,9 @@ struct Cache // portion of the address is by definition the same for all addresses in a set). std::array, CACHE_SETS> addrs{}; - std::array plru{}; - std::array valid{}; - std::array modified{}; + std::array plru{}; + std::array valid{}; + std::array modified{}; // Note: This is only for performance purposes; this same data could be computed at runtime // from the tags and valid fields (and that's how it's done on the actual cache) From d85f6c82983d2d9be7786fadc06c20f2495936c6 Mon Sep 17 00:00:00 2001 From: TheLordScruffy Date: Sun, 8 Jan 2023 00:17:46 -0500 Subject: [PATCH 5/5] DolphinAnalytics: Remove ICACHE_MATTERS --- Source/Core/Core/DolphinAnalytics.cpp | 3 +-- Source/Core/Core/DolphinAnalytics.h | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/DolphinAnalytics.cpp b/Source/Core/Core/DolphinAnalytics.cpp index 5c4c420168..8a8978cfbf 100644 --- a/Source/Core/Core/DolphinAnalytics.cpp +++ b/Source/Core/Core/DolphinAnalytics.cpp @@ -135,8 +135,7 @@ void DolphinAnalytics::ReportGameStart() } // Keep in sync with enum class GameQuirk definition. -constexpr std::array GAME_QUIRKS_NAMES{ - "icache-matters", +constexpr std::array GAME_QUIRKS_NAMES{ "directly-reads-wiimote-input", "uses-DVDLowStopLaser", "uses-DVDLowOffset", diff --git a/Source/Core/Core/DolphinAnalytics.h b/Source/Core/Core/DolphinAnalytics.h index c708ad3a13..98c7d4a973 100644 --- a/Source/Core/Core/DolphinAnalytics.h +++ b/Source/Core/Core/DolphinAnalytics.h @@ -21,12 +21,9 @@ enum class GameQuirk { - // Sometimes code run from ICache is different from its mirror in RAM. - ICACHE_MATTERS = 0, - // The Wii remote hardware makes it possible to bypass normal data reporting and directly // "read" extension or IR data. This would break our current TAS/NetPlay implementation. - DIRECTLY_READS_WIIMOTE_INPUT, + DIRECTLY_READS_WIIMOTE_INPUT = 0, // Several Wii DI commands that are rarely/never used and not implemented by Dolphin USES_DVD_LOW_STOP_LASER,