support allocating more registers for aarch64 JIT

also some minor fixes for the x64 JIT as well
This commit is contained in:
RSDuck
2021-06-29 22:25:43 +02:00
parent dd53b01f76
commit aa430608e7
10 changed files with 112 additions and 64 deletions

View File

@ -645,6 +645,8 @@ void CompileBlock(ARM* cpu)
u32 lr; u32 lr;
bool hasLink = false; bool hasLink = false;
bool hasMemoryInstr = false;
do do
{ {
r15 += thumb ? 2 : 4; r15 += thumb ? 2 : 4;
@ -707,6 +709,10 @@ void CompileBlock(ARM* cpu)
} }
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr); instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr);
hasMemoryInstr |= thumb
? (instrs[i].Info.Kind >= ARMInstrInfo::tk_LDR_PCREL && instrs[i].Info.Kind <= ARMInstrInfo::tk_STMIA)
: (instrs[i].Info.Kind >= ARMInstrInfo::ak_STR_REG_LSL && instrs[i].Info.Kind <= ARMInstrInfo::ak_STM);
cpu->R[15] = r15; cpu->R[15] = r15;
cpu->CurInstr = instrs[i].Instr; cpu->CurInstr = instrs[i].Instr;
cpu->CodeCycles = instrs[i].CodeCycles; cpu->CodeCycles = instrs[i].CodeCycles;
@ -915,7 +921,7 @@ void CompileBlock(ARM* cpu)
#if defined(__APPLE__) && defined(__aarch64__) #if defined(__APPLE__) && defined(__aarch64__)
pthread_jit_write_protect_np(false); pthread_jit_write_protect_np(false);
#endif #endif
block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i); block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i, hasMemoryInstr);
#if defined(__APPLE__) && defined(__aarch64__) #if defined(__APPLE__) && defined(__aarch64__)
pthread_jit_write_protect_np(true); pthread_jit_write_protect_np(true);
#endif #endif

View File

@ -27,7 +27,7 @@ namespace ARMJIT
{ {
template <typename T> template <typename T>
void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR) void JumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
{ {
cpu->JumpTo(addr, changeCPSR); cpu->JumpTo(addr, changeCPSR);
} }
@ -301,7 +301,7 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
bool cpsrDirty = CPSRDirty; bool cpsrDirty = CPSRDirty;
SaveCPSR(); SaveCPSR();
SaveCycles(); SaveCycles();
PushRegs(restoreCPSR); PushRegs(restoreCPSR, true);
if (switchThumb) if (switchThumb)
MOV(W1, addr); MOV(W1, addr);
@ -315,11 +315,11 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
MOV(X0, RCPU); MOV(X0, RCPU);
MOVI2R(W2, restoreCPSR); MOVI2R(W2, restoreCPSR);
if (Num == 0) if (Num == 0)
QuickCallFunction(X3, jumpToTrampoline<ARMv5>); QuickCallFunction(X3, JumpToTrampoline<ARMv5>);
else else
QuickCallFunction(X3, jumpToTrampoline<ARMv4>); QuickCallFunction(X3, JumpToTrampoline<ARMv4>);
PopRegs(restoreCPSR); PopRegs(restoreCPSR, true);
LoadCycles(); LoadCycles();
LoadCPSR(); LoadCPSR();
if (CurInstr.Cond() < 0xE) if (CurInstr.Cond() < 0xE)

View File

@ -58,9 +58,14 @@ namespace ARMJIT
template <> template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] = const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
{W19, W20, W21, W22, W23, W24, W25, W26}; {
W19, W20, W21, W22, W23, W24, W25,
W8, W9, W10, W11, W12, W13, W14, W15
};
template <> template <>
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8; const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
const int JitMemSize = 16 * 1024 * 1024; const int JitMemSize = 16 * 1024 * 1024;
#ifndef __SWITCH__ #ifndef __SWITCH__
@ -164,44 +169,55 @@ void Compiler::A_Comp_MSR()
MOV(W2, RCPSR); MOV(W2, RCPSR);
MOV(X0, RCPU); MOV(X0, RCPU);
PushRegs(true); PushRegs(true, true);
QuickCallFunction(X3, UpdateModeTrampoline);
QuickCallFunction(X3, (void*)&UpdateModeTrampoline); PopRegs(true, true);
PopRegs(true);
} }
} }
} }
void Compiler::PushRegs(bool saveHiRegs)
void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload)
{ {
BitSet32 loadedRegs(RegCache.LoadedRegs);
if (saveHiRegs) if (saveHiRegs)
{ {
if (Thumb || CurInstr.Cond() == 0xE) BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
{ {
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); if (Thumb || CurInstr.Cond() == 0xE)
for (int reg : hiRegsLoaded)
RegCache.UnloadRegister(reg); RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
// prevent saving the register twice
loadedRegs[reg] = false;
} }
else }
for (int reg : loadedRegs)
{
if (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))
{ {
BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00); if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload)
for (int reg : hiRegsDirty) RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]); SaveReg(reg, RegCache.Mapping[reg]);
} }
} }
} }
void Compiler::PopRegs(bool saveHiRegs) void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged)
{ {
if (saveHiRegs) BitSet32 loadedRegs(RegCache.LoadedRegs);
for (int reg : loadedRegs)
{ {
if (!Thumb && CurInstr.Cond() != 0xE) if ((saveHiRegs && reg >= 8 && reg < 15)
|| (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))))
{ {
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); LoadReg(reg, RegCache.Mapping[reg]);
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
} }
} }
} }
@ -267,6 +283,7 @@ Compiler::Compiler()
} }
/* /*
W4 - whether the register was written to
W5 - mode W5 - mode
W1 - reg num W1 - reg num
W3 - in/out value of reg W3 - in/out value of reg
@ -358,7 +375,7 @@ Compiler::Compiler()
{ {
for (int reg = 0; reg < 32; reg++) for (int reg = 0; reg < 32; reg++)
{ {
if (!(reg == W4 || (reg >= W19 && reg <= W26))) if (!(reg == W4 || (reg >= W8 && reg <= W15) || (reg >= W19 && reg <= W25)))
continue; continue;
ARM64Reg rdMapped = (ARM64Reg)reg; ARM64Reg rdMapped = (ARM64Reg)reg;
PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr();
@ -371,7 +388,7 @@ Compiler::Compiler()
{ {
MOV(W1, rdMapped); MOV(W1, rdMapped);
} }
ABI_PushRegisters({30}); ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (consoleType == 0) if (consoleType == 0)
{ {
switch ((8 << size) | num) switch ((8 << size) | num)
@ -397,7 +414,7 @@ Compiler::Compiler()
} }
} }
ABI_PopRegisters({30}); ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
RET(); RET();
for (int signextend = 0; signextend < 2; signextend++) for (int signextend = 0; signextend < 2; signextend++)
@ -405,7 +422,7 @@ Compiler::Compiler()
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr();
if (num == 0) if (num == 0)
MOV(X1, RCPU); MOV(X1, RCPU);
ABI_PushRegisters({30}); ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (consoleType == 0) if (consoleType == 0)
{ {
switch ((8 << size) | num) switch ((8 << size) | num)
@ -430,7 +447,7 @@ Compiler::Compiler()
case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
} }
} }
ABI_PopRegisters({30}); ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (size == 32) if (size == 32)
MOV(rdMapped, W0); MOV(rdMapped, W0);
else if (signextend) else if (signextend)
@ -673,7 +690,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken)
} }
} }
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr)
{ {
if (JitMemMainSize - GetCodeOffset() < 1024 * 16) if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
{ {
@ -695,6 +712,9 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true); RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
CPSRDirty = false; CPSRDirty = false;
if (hasMemInstr)
MOVP2R(RMemBase, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
for (int i = 0; i < instrsCount; i++) for (int i = 0; i < instrsCount; i++)
{ {
CurInstr = instrs[i]; CurInstr = instrs[i];

View File

@ -32,6 +32,7 @@
namespace ARMJIT namespace ARMJIT
{ {
const Arm64Gen::ARM64Reg RMemBase = Arm64Gen::X26;
const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27; const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28; const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29; const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
@ -99,8 +100,8 @@ public:
Compiler(); Compiler();
~Compiler(); ~Compiler();
void PushRegs(bool saveHiRegs); void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true);
void PopRegs(bool saveHiRegs); void PopRegs(bool saveHiRegs, bool saveRegsToBeChanged);
Arm64Gen::ARM64Reg MapReg(int reg) Arm64Gen::ARM64Reg MapReg(int reg)
{ {
@ -108,7 +109,7 @@ public:
return RegCache.Mapping[reg]; return RegCache.Mapping[reg];
} }
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr);
bool CanCompile(bool thumb, u16 kind); bool CanCompile(bool thumb, u16 kind);

View File

@ -194,13 +194,11 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
ptrdiff_t memopStart = GetCodeOffset(); ptrdiff_t memopStart = GetCodeOffset();
LoadStorePatch patch; LoadStorePatch patch;
assert((rdMapped >= W19 && rdMapped <= W26) || rdMapped == W4); assert((rdMapped >= W8 && rdMapped <= W15) || (rdMapped >= W19 && rdMapped <= W25) || rdMapped == W4);
patch.PatchFunc = flags & memop_Store patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped] ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped]; : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped];
MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
// take a chance at fastmem // take a chance at fastmem
if (size > 8) if (size > 8)
ANDI2R(W1, W0, addressMask); ANDI2R(W1, W0, addressMask);
@ -208,11 +206,11 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
ptrdiff_t loadStorePosition = GetCodeOffset(); ptrdiff_t loadStorePosition = GetCodeOffset();
if (flags & memop_Store) if (flags & memop_Store)
{ {
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7); STRGeneric(size, rdMapped, size > 8 ? X1 : X0, RMemBase);
} }
else else
{ {
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, RMemBase);
if (size == 32 && !addrIsStatic) if (size == 32 && !addrIsStatic)
{ {
UBFIZ(W0, W0, 3, 2); UBFIZ(W0, W0, 3, 2);
@ -230,12 +228,16 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
if (addrIsStatic) if (addrIsStatic)
func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
PushRegs(false, false);
if (func) if (func)
{ {
if (flags & memop_Store) if (flags & memop_Store)
MOV(W1, rdMapped); MOV(W1, rdMapped);
QuickCallFunction(X2, (void (*)())func); QuickCallFunction(X2, (void (*)())func);
PopRegs(false, false);
if (!(flags & memop_Store)) if (!(flags & memop_Store))
{ {
if (size == 32) if (size == 32)
@ -314,6 +316,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
} }
} }
PopRegs(false, false);
if (!(flags & memop_Store)) if (!(flags & memop_Store))
{ {
if (size == 32) if (size == 32)
@ -515,8 +519,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
ptrdiff_t fastPathStart = GetCodeOffset(); ptrdiff_t fastPathStart = GetCodeOffset();
ptrdiff_t loadStoreOffsets[8]; ptrdiff_t loadStoreOffsets[8];
MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); ADD(X1, RMemBase, X0);
ADD(X1, X1, X0);
u32 offset = 0; u32 offset = 0;
BitSet16::Iterator it = regs.begin(); BitSet16::Iterator it = regs.begin();
@ -655,6 +658,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
} }
} }
PushRegs(false, false, !compileFastPath);
ADD(X1, SP, 0); ADD(X1, SP, 0);
MOVI2R(W2, regsCount); MOVI2R(W2, regsCount);
@ -680,6 +685,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
} }
} }
PopRegs(false, false);
if (!store) if (!store)
{ {
if (usermode && !regs[15] && (regs & BitSet16(0x7f00))) if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))

View File

@ -165,7 +165,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
bool cpsrDirty = CPSRDirty; bool cpsrDirty = CPSRDirty;
SaveCPSR(); SaveCPSR();
PushRegs(restoreCPSR); PushRegs(restoreCPSR, true);
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), R(addr)); MOV(32, R(ABI_PARAM2), R(addr));
@ -178,7 +178,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else else
CALL((void*)&ARMv4JumpToTrampoline); CALL((void*)&ARMv4JumpToTrampoline);
PopRegs(restoreCPSR); PopRegs(restoreCPSR, true);
LoadCPSR(); LoadCPSR();
// in case this instruction is skipped // in case this instruction is skipped

View File

@ -64,7 +64,7 @@ const BitSet32 CallerSavedPushRegs({R10, R11});
const BitSet32 CallerSavedPushRegs({R9, R10, R11}); const BitSet32 CallerSavedPushRegs({R9, R10, R11});
#endif #endif
void Compiler::PushRegs(bool saveHiRegs) void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload)
{ {
BitSet32 loadedRegs(RegCache.LoadedRegs); BitSet32 loadedRegs(RegCache.LoadedRegs);
@ -83,17 +83,26 @@ void Compiler::PushRegs(bool saveHiRegs)
} }
for (int reg : loadedRegs) for (int reg : loadedRegs)
if (BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) {
SaveReg(reg, RegCache.Mapping[reg]); if (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))
{
if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
}
}
} }
void Compiler::PopRegs(bool saveHiRegs) void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged)
{ {
BitSet32 loadedRegs(RegCache.LoadedRegs); BitSet32 loadedRegs(RegCache.LoadedRegs);
for (int reg : loadedRegs) for (int reg : loadedRegs)
{ {
if ((saveHiRegs && reg >= 8 && reg < 15) if ((saveHiRegs && reg >= 8 && reg < 15)
|| BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) || (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))))
{ {
LoadReg(reg, RegCache.Mapping[reg]); LoadReg(reg, RegCache.Mapping[reg]);
} }
@ -205,14 +214,14 @@ void Compiler::A_Comp_MSR()
AND(32, R(RSCRATCH2), val); AND(32, R(RSCRATCH2), val);
OR(32, R(RCPSR), R(RSCRATCH2)); OR(32, R(RCPSR), R(RSCRATCH2));
PushRegs(true); PushRegs(true, true);
MOV(32, R(ABI_PARAM3), R(RCPSR)); MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((void*)&UpdateModeTrampoline); CALL((void*)&UpdateModeTrampoline);
PopRegs(true); PopRegs(true, true);
} }
} }
} }
@ -659,7 +668,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
} }
} }
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemoryInstr)
{ {
if (NearSize - (GetCodePtr() - NearStart) < 1024 * 32) // guess... if (NearSize - (GetCodePtr() - NearStart) < 1024 * 32) // guess...
{ {

View File

@ -79,7 +79,7 @@ public:
void Reset(); void Reset();
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemoryInstr);
void LoadReg(int reg, Gen::X64Reg nativeReg); void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg); void SaveReg(int reg, Gen::X64Reg nativeReg);
@ -192,8 +192,8 @@ public:
Gen::FixupBranch CheckCondition(u32 cond); Gen::FixupBranch CheckCondition(u32 cond);
void PushRegs(bool saveHiRegs); void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true);
void PopRegs(bool saveHiRegs); void PopRegs(bool saveHiRegs, bool saveRegsToBeChanged);
Gen::OpArg MapReg(int reg) Gen::OpArg MapReg(int reg)
{ {

View File

@ -266,7 +266,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
} }
else else
{ {
PushRegs(false); PushRegs(false, false);
void* func = NULL; void* func = NULL;
if (addrIsStatic) if (addrIsStatic)
@ -283,7 +283,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
ABI_CallFunction((void (*)())func); ABI_CallFunction((void (*)())func);
PopRegs(false); PopRegs(false, false);
if (!(flags & memop_Store)) if (!(flags & memop_Store))
{ {
@ -370,7 +370,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
} }
} }
PopRegs(false); PopRegs(false, false);
if (!(flags & memop_Store)) if (!(flags & memop_Store))
{ {
@ -508,7 +508,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (!store) if (!store)
{ {
PushRegs(false); PushRegs(false, false, !compileFastPath);
MOV(32, R(ABI_PARAM1), R(RSCRATCH4)); MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
MOV(32, R(ABI_PARAM3), Imm32(regsCount)); MOV(32, R(ABI_PARAM3), Imm32(regsCount));
@ -529,7 +529,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break; case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
} }
PopRegs(false); PopRegs(false, false);
if (allocOffset) if (allocOffset)
ADD(64, R(RSP), Imm8(allocOffset)); ADD(64, R(RSP), Imm8(allocOffset));
@ -606,7 +606,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (allocOffset) if (allocOffset)
SUB(64, R(RSP), Imm8(allocOffset)); SUB(64, R(RSP), Imm8(allocOffset));
PushRegs(false); PushRegs(false, false, !compileFastPath);
MOV(32, R(ABI_PARAM1), R(RSCRATCH4)); MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
if (allocOffset) if (allocOffset)
@ -628,7 +628,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc)); ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
PopRegs(false); PopRegs(false, false);
} }
if (compileFastPath) if (compileFastPath)

View File

@ -526,7 +526,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & A_LoadMem) if (data & A_LoadMem)
{ {
if (res.SrcRegs == (1 << 15)) if (res.SrcRegs == (1 << 15))
res.SpecialKind = special_LoadLiteral; res.SpecialKind = special_LoadLiteral;
else else
res.SpecialKind = special_LoadMem; res.SpecialKind = special_LoadMem;
} }
@ -536,6 +536,11 @@ Info Decode(bool thumb, u32 num, u32 instr)
u16 set = (instr & 0xFFFF); u16 set = (instr & 0xFFFF);
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15)); res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.DstRegs |= set; res.DstRegs |= set;
// when the instruction is executed not in usermode a banked register in memory will be written to
// but the unbanked register will still be allocated, so it is expected to carry the proper value
// thus it is a source register
if (instr & (1<<22))
res.SrcRegs |= set & 0x7F00;
} }
if (res.Kind == ak_STM) if (res.Kind == ak_STM)
{ {