PowerPC: Correctly handle stswi/stswx to uncached memory

On real hardware, stswi and stswx don't trigger any of the special
behavior for uncached unaligned writes that was implemented in 543ed8a.
This is confirmed by a hwtest (a new commit in
https://github.com/dolphin-emu/hwtests/pull/42).

This change fixes Dolphin's stswi and stswx implementations so they stop
triggering the special behavior, bringing them back to the behavior they
had before 543ed8a. No games are known to be affected, but Extrems has
reported that it affects homebrew they've made.
This commit is contained in:
JosJuice
2025-05-10 17:28:38 +02:00
parent 63572f15fc
commit 2b376a92ae
2 changed files with 61 additions and 41 deletions

View File

@ -309,6 +309,8 @@ private:
static void Helper_FloatCompareUnordered(PowerPC::PowerPCState& ppc_state, UGeckoInstruction inst, static void Helper_FloatCompareUnordered(PowerPC::PowerPCState& ppc_state, UGeckoInstruction inst,
double a, double b); double a, double b);
static void Helper_StoreString(Interpreter& interpreter, const u32 EA, u32 n, u32 r);
void UpdatePC(); void UpdatePC();
bool IsInvalidPairedSingleExecution(UGeckoInstruction inst); bool IsInvalidPairedSingleExecution(UGeckoInstruction inst);

View File

@ -957,9 +957,6 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst)
} }
} }
// todo : optimize ?
// stswi - bizarro string instruction
// FIXME: Should rollback if a DSI occurs
void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst)
{ {
auto& ppc_state = interpreter.m_ppc_state; auto& ppc_state = interpreter.m_ppc_state;
@ -973,38 +970,13 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst)
return; return;
} }
u32 n = 32; Helper_StoreString(interpreter, EA, inst.NB == 0 ? 32 : inst.NB, inst.RS);
if (inst.NB != 0)
n = inst.NB;
u32 r = u32{inst.RS} - 1;
u32 i = 0;
while (n > 0)
{
if (i == 0)
{
r++;
r &= 31;
}
interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA);
if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0)
{
return;
}
i += 8;
if (i == 32)
i = 0;
EA++;
n--;
}
} }
// TODO: is this right? is it DSI interruptible?
void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst)
{ {
auto& ppc_state = interpreter.m_ppc_state; auto& ppc_state = interpreter.m_ppc_state;
u32 EA = Helper_Get_EA_X(ppc_state, inst); const u32 EA = Helper_Get_EA_X(ppc_state, inst);
if (ppc_state.msr.LE) if (ppc_state.msr.LE)
{ {
@ -1012,22 +984,68 @@ void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst)
return; return;
} }
u32 n = u8(ppc_state.xer_stringctrl); Helper_StoreString(interpreter, EA, u8(ppc_state.xer_stringctrl), inst.RS);
u32 r = inst.RS; }
u32 i = 0;
while (n > 0) void Interpreter::Helper_StoreString(Interpreter& interpreter, const u32 EA, u32 n, u32 r)
{ {
interpreter.m_mmu.Write_U8((ppc_state.gpr[r] >> (24 - i)) & 0xFF, EA); // Helper for stswi/stswx, the oddball store string instructions.
//
// If we ask the MMU code to write to uncached memory and the start or end address isn't divisible
// by 4, it results in surrounding bytes getting overwritten. stswi/stswx should never trigger
// this behavior, so we need to be careful to only use 32-bit writes with proper alignment here.
//
// TODO: How should DSI exceptions in the middle of the instruction be handled?
EA++; auto& ppc_state = interpreter.m_ppc_state;
n--;
i += 8; const u32 misalignment_bytes = EA & 3;
if (i == 32) const u32 misalignment_bits = misalignment_bytes * 8;
u32 current_address = EA & ~3;
u64 current_value = 0;
if (misalignment_bytes != 0)
{ {
i = 0; // Handle misalignment at start
current_value = interpreter.m_mmu.Read_U32(current_address);
if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0)
return;
current_value <<= misalignment_bits;
current_value &= 0xFFFF'FFFF'0000'0000;
n += misalignment_bytes;
}
while (n >= 4)
{
current_value |= ppc_state.gpr[r];
interpreter.m_mmu.Write_U32(static_cast<u32>(current_value >> misalignment_bits),
current_address);
if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0)
return;
current_value <<= 32;
current_address += 4;
n -= 4;
r = (r + 1) & 0x1f; // wrap r = (r + 1) & 0x1f; // wrap
} }
if (n != 0)
{
// Handle misalignment at end
if (n > misalignment_bytes)
{
current_value |= ppc_state.gpr[r];
current_value <<= (n - misalignment_bytes) * 8;
}
else
{
current_value >>= (misalignment_bytes - n) * 8;
}
current_value &= 0xFFFF'FFFF'0000'0000;
current_value |= (interpreter.m_mmu.Read_U32(current_address) << (n * 8)) & 0xFFFF'FFFF;
if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0)
return;
interpreter.m_mmu.Write_U32(static_cast<u32>(current_value >> (n * 8)), current_address);
} }
} }