diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index cac590af..27c24c7e 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -138,18 +138,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) bool cpsrDirty = CPSRDirty; SaveCPSR(); - if (restoreCPSR) - { - if (Thumb || CurInstr.Cond() >= 0xE) - RegCache.Flush(); - else - { - // the ugly way... - // we only save them, to load and save them again - for (int reg : hiRegsLoaded) - SaveReg(reg, RegCache.Mapping[reg]); - } - } + PushRegs(restoreCPSR); MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(32, R(ABI_PARAM2), R(addr)); @@ -162,11 +151,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) else CALL((void*)&ARMv4::JumpTo); - if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE) - { - for (int reg : hiRegsLoaded) - LoadReg(reg, RegCache.Mapping[reg]); - } + PopRegs(restoreCPSR); LoadCPSR(); // in case this instruction is skipped diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index eee2e0f3..ef04601c 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -26,7 +26,8 @@ const X64Reg RegisterCache::NativeRegAllocOrder[] = #ifdef _WIN32 RBX, RSI, RDI, R12, R13, R14 #else - RBX, R12, R13, R14 // this is sad + RBX, R12, R13, R14, // callee saved, this is sad + R9, R10, R11, // caller saved #endif }; template <> @@ -34,10 +35,46 @@ const int RegisterCache::NativeRegsAvailable = #ifdef _WIN32 6 #else - 4 + 7 #endif ; +void Compiler::PushRegs(bool saveHiRegs) +{ + BitSet32 loadedRegs(RegCache.LoadedRegs); + + if (saveHiRegs) + { + BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); + for (int reg : hiRegsLoaded) + { + if (Thumb || CurInstr.Cond() == 0xE) + RegCache.UnloadRegister(reg); + else + SaveReg(reg, RegCache.Mapping[reg]); + // prevent saving the register twice + loadedRegs[reg] = false; + } + } + + for (int reg : loadedRegs) + if (BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) + SaveReg(reg, RegCache.Mapping[reg]); +} + +void Compiler::PopRegs(bool saveHiRegs) +{ + BitSet32 loadedRegs(RegCache.LoadedRegs); + for (int reg : loadedRegs) + { + if ((saveHiRegs && reg >= 8 && reg < 15) + || BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) + { + LoadReg(reg, RegCache.Mapping[reg]); + } + } +} + void Compiler::A_Comp_MRS() { Comp_AddCycles_C(); @@ -136,27 +173,14 @@ void Compiler::A_Comp_MSR() AND(32, R(RSCRATCH2), val); OR(32, R(RCPSR), R(RSCRATCH2)); - BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); - if (Thumb || CurInstr.Cond() >= 0xE) - RegCache.Flush(); - else - { - // the ugly way... - // we only save them, to load and save them again - for (int reg : hiRegsLoaded) - SaveReg(reg, RegCache.Mapping[reg]); - } + PushRegs(true); MOV(32, R(ABI_PARAM3), R(RCPSR)); MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); MOV(64, R(ABI_PARAM1), R(RCPU)); CALL((void*)&ARM::UpdateMode); - if (!Thumb && CurInstr.Cond() < 0xE) - { - for (int reg : hiRegsLoaded) - LoadReg(reg, RegCache.Mapping[reg]); - } + PopRegs(true); } } } diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 9df218b8..f2fc3018 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -168,6 +168,9 @@ public: Gen::FixupBranch CheckCondition(u32 cond); + void PushRegs(bool saveHiRegs); + void PopRegs(bool saveHiRegs); + Gen::OpArg MapReg(int reg) { if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG) diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index c13b7796..b27efdd9 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -283,6 +283,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz } else { + PushRegs(false); + u32 maskedDataRegion; if (addrIsStatic) @@ -310,6 +312,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz MOV(32, R(ABI_PARAM2), rdMapped); ABI_CallFunction((void(*)())func); + + PopRegs(false); } else { @@ -318,6 +322,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz ABI_CallFunction((void(*)())func); + PopRegs(false); + if (!addrIsStatic) MOV(32, R(RSCRATCH3), rdMapped); @@ -352,6 +358,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz if (compileSlowPath) { + PushRegs(false); + if (Num == 0) { MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); @@ -402,6 +410,9 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz } } } + + PopRegs(false); + if (!(flags & memop_Store)) { if (flags & memop_SignExtend) @@ -561,6 +572,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (!store) { + PushRegs(false); + MOV(32, R(ABI_PARAM1), R(RSCRATCH4)); MOV(32, R(ABI_PARAM3), Imm32(regsCount)); SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc)); @@ -580,6 +593,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc case 3: CALL((void*)&SlowBlockTransfer7); break; } + PopRegs(false); + if (allocOffset) ADD(64, R(RSP), Imm8(allocOffset)); @@ -655,6 +670,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (allocOffset) SUB(64, R(RSP), Imm8(allocOffset)); + PushRegs(false); + MOV(32, R(ABI_PARAM1), R(RSCRATCH4)); if (allocOffset) LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset)); @@ -674,6 +691,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc)); + + PopRegs(false); } if (compileFastPath)