diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index 24e730b1..235155f4 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -185,7 +185,7 @@ public: void T_Comp_BL_LONG_2(); void T_Comp_BL_Merged(); - s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); + s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn); void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn); diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 5ac629b1..ba1bf00c 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -465,7 +465,7 @@ void Compiler::T_Comp_MemSPRel() Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store); } -s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn) { IrregularCycles = true; @@ -474,7 +474,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (regsCount == 0) return 0; // actually not the right behaviour TODO: fix me - if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) + int firstReg = *regs.begin(); + if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << firstReg) && !(firstReg == rn && skipLoadingRn)) { int flags = 0; if (store) @@ -483,7 +484,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc flags |= memop_SubtractOffset; Op2 offset = preinc ? Op2(4) : Op2(0); - Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); + Comp_MemAccess(firstReg, rn, offset, 32, flags); return decrement ? -4 : 4; } @@ -539,12 +540,16 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc loadStoreOffsets[i++] = GetCodeOffset(); if (store) + { STR(INDEX_UNSIGNED, first, X1, offset); - else + } + else if (!(reg == rn && skipLoadingRn)) + { LDR(INDEX_UNSIGNED, first, X1, offset); - if (!(RegCache.LoadedRegs & (1 << reg)) && !store) - SaveReg(reg, first); + if (!(RegCache.LoadedRegs & (1 << reg))) + SaveReg(reg, first); + } offset += 4; } @@ -558,13 +563,23 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc ARM64Reg first = W3, second = W4; if (RegCache.LoadedRegs & (1 << reg)) - first = MapReg(reg); + { + if (!(reg == rn && skipLoadingRn)) + first = MapReg(reg); + } else if (store) + { LoadReg(reg, first); + } if (RegCache.LoadedRegs & (1 << nextReg)) - second = MapReg(nextReg); + { + if (!(nextReg == rn && skipLoadingRn)) + second = MapReg(nextReg); + } else if (store) + { LoadReg(nextReg, second); + } loadStoreOffsets[i++] = GetCodeOffset(); if (store) @@ -705,20 +720,23 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc LDR(INDEX_UNSIGNED, W3, SP, i * 8); MOVI2R(W1, reg - 8); BL(WriteBanked); - FixupBranch alreadyWritten = CBNZ(W4); - if (RegCache.LoadedRegs & (1 << reg)) - MOV(MapReg(reg), W3); - else - SaveReg(reg, W3); - SetJumpTarget(alreadyWritten); + if (!(reg == rn && skipLoadingRn)) + { + FixupBranch alreadyWritten = CBNZ(W4); + if (RegCache.LoadedRegs & (1 << reg)) + MOV(MapReg(reg), W3); + else + SaveReg(reg, W3); + SetJumpTarget(alreadyWritten); + } } else if (!usermode && nextReg != regs.end()) { ARM64Reg first = W3, second = W4; - if (RegCache.LoadedRegs & (1 << reg)) + if (RegCache.LoadedRegs & (1 << reg) && !(reg == rn && skipLoadingRn)) first = MapReg(reg); - if (RegCache.LoadedRegs & (1 << *nextReg)) + if (RegCache.LoadedRegs & (1 << *nextReg) && !(*nextReg == rn && skipLoadingRn)) second = MapReg(*nextReg); LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8); @@ -733,8 +751,11 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } else if (RegCache.LoadedRegs & (1 << reg)) { - ARM64Reg mapped = MapReg(reg); - LDR(INDEX_UNSIGNED, mapped, SP, i * 8); + if (!(reg == rn && skipLoadingRn)) + { + ARM64Reg mapped = MapReg(reg); + LDR(INDEX_UNSIGNED, mapped, SP, i * 8); + } } else { @@ -778,13 +799,13 @@ void Compiler::A_Comp_LDM_STM() ARM64Reg rn = MapReg(CurInstr.A_Reg(16)); - s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); - if (load && writeback && regs[CurInstr.A_Reg(16)]) writeback = Num == 0 - ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) - : false; - if (writeback) + && (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)); + + s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode, load && writeback); + + if (writeback && offset) { if (offset > 0) ADD(rn, rn, offset); @@ -820,10 +841,12 @@ void Compiler::T_Comp_LDMIA_STMIA() ARM64Reg rb = MapReg(CurInstr.T_Reg(8)); bool load = CurInstr.Instr & (1 << 11); u32 regsCount = regs.Count(); - - s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); - if (!load || !regs[CurInstr.T_Reg(8)]) + bool writeback = !load || !regs[CurInstr.T_Reg(8)]; + + s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false, load && writeback); + + if (writeback && offset) { if (offset > 0) ADD(rb, rb, offset); diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index eedfc1ae..98362821 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -163,7 +163,7 @@ public: memop_SubtractOffset = 1 << 4 }; void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags); - s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); + s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn); bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 88c0a9b8..776d641e 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -399,14 +399,15 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag } } -s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode, bool skipLoadingRn) { int regsCount = regs.Count(); if (regsCount == 0) return 0; // actually not the right behaviour TODO: fix me - if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) + int firstReg = *regs.begin(); + if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << firstReg) && !(firstReg == rn && skipLoadingRn)) { int flags = 0; if (store) @@ -415,7 +416,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc flags |= memop_SubtractOffset; Op2 offset = preinc ? Op2(4) : Op2(0); - Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); + Comp_MemAccess(firstReg, rn, offset, 32, flags); return decrement ? -4 : 4; } @@ -482,7 +483,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc { if (RegCache.LoadedRegs & (1 << reg)) { - MOV(32, MapReg(reg), mem); + if (!(reg == rn && skipLoadingRn)) + MOV(32, MapReg(reg), mem); + else + MOV(32, R(RSCRATCH), mem); // just touch the memory } else { @@ -548,12 +552,15 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc MOV(32, R(RSCRATCH2), Imm32(reg - 8)); POP(RSCRATCH3); CALL(WriteBanked); - FixupBranch sucessfulWritten = J_CC(CC_NC); - if (RegCache.LoadedRegs & (1 << reg)) - MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3)); - else - SaveReg(reg, RSCRATCH3); - SetJumpTarget(sucessfulWritten); + if (!(reg == rn && skipLoadingRn)) + { + FixupBranch sucessfulWritten = J_CC(CC_NC); + if (RegCache.LoadedRegs & (1 << reg)) + MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3)); + else + SaveReg(reg, RSCRATCH3); + SetJumpTarget(sucessfulWritten); + } } else if (!(RegCache.LoadedRegs & (1 << reg))) { @@ -562,6 +569,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc POP(RSCRATCH); SaveReg(reg, RSCRATCH); } + else if (reg == rn && skipLoadingRn) + { + ADD(64, R(RSP), Imm8(8)); + } else { POP(MapReg(reg).GetSimpleReg()); @@ -748,14 +759,14 @@ void Compiler::A_Comp_LDM_STM() OpArg rn = MapReg(CurInstr.A_Reg(16)); - s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); - if (load && writeback && regs[CurInstr.A_Reg(16)]) writeback = Num == 0 - ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) - : false; - if (writeback) - ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset)); + && (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)); + + s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode, load && writeback); + + if (writeback && offset) + ADD(32, rn, Imm32(offset)); } void Compiler::T_Comp_MemImm() @@ -825,9 +836,10 @@ void Compiler::T_Comp_PUSH_POP() } OpArg sp = MapReg(13); - s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false); + s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false, false); - ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max + if (offset) + ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max } void Compiler::T_Comp_LDMIA_STMIA() @@ -836,9 +848,11 @@ void Compiler::T_Comp_LDMIA_STMIA() OpArg rb = MapReg(CurInstr.T_Reg(8)); bool load = CurInstr.Instr & (1 << 11); - s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); + bool writeback = !load || !regs[CurInstr.T_Reg(8)]; - if (!load || !regs[CurInstr.T_Reg(8)]) + s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false, load && writeback); + + if (writeback && offset) ADD(32, rb, Imm8(offset)); }