support allocating more registers for aarch64 JIT

also some minor fixes for the x64 JIT as well
This commit is contained in:
RSDuck
2021-06-29 22:25:43 +02:00
parent dd53b01f76
commit aa430608e7
10 changed files with 112 additions and 64 deletions

View File

@ -58,9 +58,14 @@ namespace ARMJIT
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
{W19, W20, W21, W22, W23, W24, W25, W26};
{
W19, W20, W21, W22, W23, W24, W25,
W8, W9, W10, W11, W12, W13, W14, W15
};
template <>
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
const int JitMemSize = 16 * 1024 * 1024;
#ifndef __SWITCH__
@ -164,44 +169,55 @@ void Compiler::A_Comp_MSR()
MOV(W2, RCPSR);
MOV(X0, RCPU);
PushRegs(true);
QuickCallFunction(X3, (void*)&UpdateModeTrampoline);
PopRegs(true);
PushRegs(true, true);
QuickCallFunction(X3, UpdateModeTrampoline);
PopRegs(true, true);
}
}
}
void Compiler::PushRegs(bool saveHiRegs)
void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
if (saveHiRegs)
{
if (Thumb || CurInstr.Cond() == 0xE)
BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
{
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
// prevent saving the register twice
loadedRegs[reg] = false;
}
else
}
for (int reg : loadedRegs)
{
if (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))
{
BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsDirty)
if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::PopRegs(bool saveHiRegs)
void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged)
{
if (saveHiRegs)
BitSet32 loadedRegs(RegCache.LoadedRegs);
for (int reg : loadedRegs)
{
if (!Thumb && CurInstr.Cond() != 0xE)
if ((saveHiRegs && reg >= 8 && reg < 15)
|| (CallerSavedPushRegs[RegCache.Mapping[reg]]
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))))
{
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
LoadReg(reg, RegCache.Mapping[reg]);
}
}
}
@ -267,6 +283,7 @@ Compiler::Compiler()
}
/*
W4 - whether the register was written to
W5 - mode
W1 - reg num
W3 - in/out value of reg
@ -358,7 +375,7 @@ Compiler::Compiler()
{
for (int reg = 0; reg < 32; reg++)
{
if (!(reg == W4 || (reg >= W19 && reg <= W26)))
if (!(reg == W4 || (reg >= W8 && reg <= W15) || (reg >= W19 && reg <= W25)))
continue;
ARM64Reg rdMapped = (ARM64Reg)reg;
PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr();
@ -371,7 +388,7 @@ Compiler::Compiler()
{
MOV(W1, rdMapped);
}
ABI_PushRegisters({30});
ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (consoleType == 0)
{
switch ((8 << size) | num)
@ -397,7 +414,7 @@ Compiler::Compiler()
}
}
ABI_PopRegisters({30});
ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
RET();
for (int signextend = 0; signextend < 2; signextend++)
@ -405,7 +422,7 @@ Compiler::Compiler()
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr();
if (num == 0)
MOV(X1, RCPU);
ABI_PushRegisters({30});
ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (consoleType == 0)
{
switch ((8 << size) | num)
@ -430,7 +447,7 @@ Compiler::Compiler()
case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
}
}
ABI_PopRegisters({30});
ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
if (size == 32)
MOV(rdMapped, W0);
else if (signextend)
@ -673,7 +690,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken)
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr)
{
if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
{
@ -695,6 +712,9 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
CPSRDirty = false;
if (hasMemInstr)
MOVP2R(RMemBase, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
for (int i = 0; i < instrsCount; i++)
{
CurInstr = instrs[i];