From 2b376a92ae47a144afb17c652f18e6e45bed3007 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 10 May 2025 17:28:38 +0200 Subject: [PATCH] PowerPC: Correctly handle stswi/stswx to uncached memory On real hardware, stswi and stswx don't trigger any of the special behavior for uncached unaligned writes that was implemented in 543ed8a. This is confirmed by a hwtest (a new commit in https://github.com/dolphin-emu/hwtests/pull/42). This change fixes Dolphin's stswi and stswx implementations so they stop triggering the special behavior, bringing them back to the behavior they had before 543ed8a. No games are known to be affected, but Extrems has reported that it affects homebrew they've made. --- .../Core/PowerPC/Interpreter/Interpreter.h | 2 + .../Interpreter/Interpreter_LoadStore.cpp | 100 +++++++++++------- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter.h index d0c87b1c90..af800d65cb 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.h @@ -309,6 +309,8 @@ private: static void Helper_FloatCompareUnordered(PowerPC::PowerPCState& ppc_state, UGeckoInstruction inst, double a, double b); + static void Helper_StoreString(Interpreter& interpreter, const u32 EA, u32 n, u32 r); + void UpdatePC(); bool IsInvalidPairedSingleExecution(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index a01090561e..db342600a0 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -957,9 +957,6 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst) } } -// todo : optimize ? -// stswi - bizarro string instruction -// FIXME: Should rollback if a DSI occurs void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; @@ -973,38 +970,13 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) return; } - u32 n = 32; - if (inst.NB != 0) - n = inst.NB; - - u32 r = u32{inst.RS} - 1; - u32 i = 0; - while (n > 0) - { - if (i == 0) - { - r++; - r &= 31; - } - interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA); - if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0) - { - return; - } - - i += 8; - if (i == 32) - i = 0; - EA++; - n--; - } + Helper_StoreString(interpreter, EA, inst.NB == 0 ? 32 : inst.NB, inst.RS); } -// TODO: is this right? is it DSI interruptible? void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - u32 EA = Helper_Get_EA_X(ppc_state, inst); + const u32 EA = Helper_Get_EA_X(ppc_state, inst); if (ppc_state.msr.LE) { @@ -1012,22 +984,68 @@ void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst) return; } - u32 n = u8(ppc_state.xer_stringctrl); - u32 r = inst.RS; - u32 i = 0; + Helper_StoreString(interpreter, EA, u8(ppc_state.xer_stringctrl), inst.RS); +} - while (n > 0) +void Interpreter::Helper_StoreString(Interpreter& interpreter, const u32 EA, u32 n, u32 r) +{ + // Helper for stswi/stswx, the oddball store string instructions. + // + // If we ask the MMU code to write to uncached memory and the start or end address isn't divisible + // by 4, it results in surrounding bytes getting overwritten. stswi/stswx should never trigger + // this behavior, so we need to be careful to only use 32-bit writes with proper alignment here. + // + // TODO: How should DSI exceptions in the middle of the instruction be handled? + + auto& ppc_state = interpreter.m_ppc_state; + + const u32 misalignment_bytes = EA & 3; + const u32 misalignment_bits = misalignment_bytes * 8; + + u32 current_address = EA & ~3; + u64 current_value = 0; + if (misalignment_bytes != 0) { - interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA); + // Handle misalignment at start + current_value = interpreter.m_mmu.Read_U32(current_address); + if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0) + return; + current_value <<= misalignment_bits; + current_value &= 0xFFFF'FFFF'0000'0000; + n += misalignment_bytes; + } - EA++; - n--; - i += 8; - if (i == 32) + while (n >= 4) + { + current_value |= ppc_state.gpr[r]; + interpreter.m_mmu.Write_U32(static_cast(current_value >> misalignment_bits), + current_address); + if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0) + return; + + current_value <<= 32; + current_address += 4; + n -= 4; + r = (r + 1) & 0x1f; // wrap + } + + if (n != 0) + { + // Handle misalignment at end + if (n > misalignment_bytes) { - i = 0; - r = (r + 1) & 0x1f; // wrap + current_value |= ppc_state.gpr[r]; + current_value <<= (n - misalignment_bytes) * 8; } + else + { + current_value >>= (misalignment_bytes - n) * 8; + } + current_value &= 0xFFFF'FFFF'0000'0000; + current_value |= (interpreter.m_mmu.Read_U32(current_address) << (n * 8)) & 0xFFFF'FFFF; + if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0) + return; + interpreter.m_mmu.Write_U32(static_cast(current_value >> (n * 8)), current_address); } }