mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-29 00:59:56 -06:00
support allocating more registers for aarch64 JIT
also some minor fixes for the x64 JIT as well
This commit is contained in:
@ -27,7 +27,7 @@ namespace ARMJIT
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
|
||||
void JumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
|
||||
{
|
||||
cpu->JumpTo(addr, changeCPSR);
|
||||
}
|
||||
@ -301,7 +301,7 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
|
||||
bool cpsrDirty = CPSRDirty;
|
||||
SaveCPSR();
|
||||
SaveCycles();
|
||||
PushRegs(restoreCPSR);
|
||||
PushRegs(restoreCPSR, true);
|
||||
|
||||
if (switchThumb)
|
||||
MOV(W1, addr);
|
||||
@ -315,11 +315,11 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
|
||||
MOV(X0, RCPU);
|
||||
MOVI2R(W2, restoreCPSR);
|
||||
if (Num == 0)
|
||||
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
|
||||
QuickCallFunction(X3, JumpToTrampoline<ARMv5>);
|
||||
else
|
||||
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
|
||||
QuickCallFunction(X3, JumpToTrampoline<ARMv4>);
|
||||
|
||||
PopRegs(restoreCPSR);
|
||||
PopRegs(restoreCPSR, true);
|
||||
LoadCycles();
|
||||
LoadCPSR();
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
|
@ -58,9 +58,14 @@ namespace ARMJIT
|
||||
|
||||
template <>
|
||||
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
|
||||
{W19, W20, W21, W22, W23, W24, W25, W26};
|
||||
{
|
||||
W19, W20, W21, W22, W23, W24, W25,
|
||||
W8, W9, W10, W11, W12, W13, W14, W15
|
||||
};
|
||||
template <>
|
||||
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
|
||||
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;
|
||||
|
||||
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
|
||||
|
||||
const int JitMemSize = 16 * 1024 * 1024;
|
||||
#ifndef __SWITCH__
|
||||
@ -164,44 +169,55 @@ void Compiler::A_Comp_MSR()
|
||||
MOV(W2, RCPSR);
|
||||
MOV(X0, RCPU);
|
||||
|
||||
PushRegs(true);
|
||||
|
||||
QuickCallFunction(X3, (void*)&UpdateModeTrampoline);
|
||||
|
||||
PopRegs(true);
|
||||
PushRegs(true, true);
|
||||
QuickCallFunction(X3, UpdateModeTrampoline);
|
||||
PopRegs(true, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::PushRegs(bool saveHiRegs)
|
||||
|
||||
void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload)
|
||||
{
|
||||
BitSet32 loadedRegs(RegCache.LoadedRegs);
|
||||
|
||||
if (saveHiRegs)
|
||||
{
|
||||
if (Thumb || CurInstr.Cond() == 0xE)
|
||||
BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
|
||||
for (int reg : hiRegsLoaded)
|
||||
{
|
||||
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
|
||||
for (int reg : hiRegsLoaded)
|
||||
if (Thumb || CurInstr.Cond() == 0xE)
|
||||
RegCache.UnloadRegister(reg);
|
||||
else
|
||||
SaveReg(reg, RegCache.Mapping[reg]);
|
||||
// prevent saving the register twice
|
||||
loadedRegs[reg] = false;
|
||||
}
|
||||
else
|
||||
}
|
||||
|
||||
for (int reg : loadedRegs)
|
||||
{
|
||||
if (CallerSavedPushRegs[RegCache.Mapping[reg]]
|
||||
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))
|
||||
{
|
||||
BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
|
||||
for (int reg : hiRegsDirty)
|
||||
if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload)
|
||||
RegCache.UnloadRegister(reg);
|
||||
else
|
||||
SaveReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::PopRegs(bool saveHiRegs)
|
||||
void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged)
|
||||
{
|
||||
if (saveHiRegs)
|
||||
BitSet32 loadedRegs(RegCache.LoadedRegs);
|
||||
for (int reg : loadedRegs)
|
||||
{
|
||||
if (!Thumb && CurInstr.Cond() != 0xE)
|
||||
if ((saveHiRegs && reg >= 8 && reg < 15)
|
||||
|| (CallerSavedPushRegs[RegCache.Mapping[reg]]
|
||||
&& (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))))
|
||||
{
|
||||
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
|
||||
|
||||
for (int reg : hiRegsLoaded)
|
||||
LoadReg(reg, RegCache.Mapping[reg]);
|
||||
LoadReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -267,6 +283,7 @@ Compiler::Compiler()
|
||||
}
|
||||
|
||||
/*
|
||||
W4 - whether the register was written to
|
||||
W5 - mode
|
||||
W1 - reg num
|
||||
W3 - in/out value of reg
|
||||
@ -358,7 +375,7 @@ Compiler::Compiler()
|
||||
{
|
||||
for (int reg = 0; reg < 32; reg++)
|
||||
{
|
||||
if (!(reg == W4 || (reg >= W19 && reg <= W26)))
|
||||
if (!(reg == W4 || (reg >= W8 && reg <= W15) || (reg >= W19 && reg <= W25)))
|
||||
continue;
|
||||
ARM64Reg rdMapped = (ARM64Reg)reg;
|
||||
PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr();
|
||||
@ -371,7 +388,7 @@ Compiler::Compiler()
|
||||
{
|
||||
MOV(W1, rdMapped);
|
||||
}
|
||||
ABI_PushRegisters({30});
|
||||
ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
|
||||
if (consoleType == 0)
|
||||
{
|
||||
switch ((8 << size) | num)
|
||||
@ -397,7 +414,7 @@ Compiler::Compiler()
|
||||
}
|
||||
}
|
||||
|
||||
ABI_PopRegisters({30});
|
||||
ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
|
||||
RET();
|
||||
|
||||
for (int signextend = 0; signextend < 2; signextend++)
|
||||
@ -405,7 +422,7 @@ Compiler::Compiler()
|
||||
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr();
|
||||
if (num == 0)
|
||||
MOV(X1, RCPU);
|
||||
ABI_PushRegisters({30});
|
||||
ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs);
|
||||
if (consoleType == 0)
|
||||
{
|
||||
switch ((8 << size) | num)
|
||||
@ -430,7 +447,7 @@ Compiler::Compiler()
|
||||
case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
|
||||
}
|
||||
}
|
||||
ABI_PopRegisters({30});
|
||||
ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs);
|
||||
if (size == 32)
|
||||
MOV(rdMapped, W0);
|
||||
else if (signextend)
|
||||
@ -673,7 +690,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken)
|
||||
}
|
||||
}
|
||||
|
||||
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr)
|
||||
{
|
||||
if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
|
||||
{
|
||||
@ -695,6 +712,9 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
|
||||
CPSRDirty = false;
|
||||
|
||||
if (hasMemInstr)
|
||||
MOVP2R(RMemBase, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
|
||||
|
||||
for (int i = 0; i < instrsCount; i++)
|
||||
{
|
||||
CurInstr = instrs[i];
|
||||
|
@ -32,6 +32,7 @@
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
const Arm64Gen::ARM64Reg RMemBase = Arm64Gen::X26;
|
||||
const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
|
||||
const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
|
||||
const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
|
||||
@ -99,8 +100,8 @@ public:
|
||||
Compiler();
|
||||
~Compiler();
|
||||
|
||||
void PushRegs(bool saveHiRegs);
|
||||
void PopRegs(bool saveHiRegs);
|
||||
void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true);
|
||||
void PopRegs(bool saveHiRegs, bool saveRegsToBeChanged);
|
||||
|
||||
Arm64Gen::ARM64Reg MapReg(int reg)
|
||||
{
|
||||
@ -108,7 +109,7 @@ public:
|
||||
return RegCache.Mapping[reg];
|
||||
}
|
||||
|
||||
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr);
|
||||
|
||||
bool CanCompile(bool thumb, u16 kind);
|
||||
|
||||
|
@ -194,13 +194,11 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
|
||||
ptrdiff_t memopStart = GetCodeOffset();
|
||||
LoadStorePatch patch;
|
||||
|
||||
assert((rdMapped >= W19 && rdMapped <= W26) || rdMapped == W4);
|
||||
assert((rdMapped >= W8 && rdMapped <= W15) || (rdMapped >= W19 && rdMapped <= W25) || rdMapped == W4);
|
||||
patch.PatchFunc = flags & memop_Store
|
||||
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped]
|
||||
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped];
|
||||
|
||||
MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
|
||||
|
||||
// take a chance at fastmem
|
||||
if (size > 8)
|
||||
ANDI2R(W1, W0, addressMask);
|
||||
@ -208,11 +206,11 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
|
||||
ptrdiff_t loadStorePosition = GetCodeOffset();
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7);
|
||||
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, RMemBase);
|
||||
}
|
||||
else
|
||||
{
|
||||
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
|
||||
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, RMemBase);
|
||||
if (size == 32 && !addrIsStatic)
|
||||
{
|
||||
UBFIZ(W0, W0, 3, 2);
|
||||
@ -230,12 +228,16 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
|
||||
if (addrIsStatic)
|
||||
func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
|
||||
|
||||
PushRegs(false, false);
|
||||
|
||||
if (func)
|
||||
{
|
||||
if (flags & memop_Store)
|
||||
MOV(W1, rdMapped);
|
||||
QuickCallFunction(X2, (void (*)())func);
|
||||
|
||||
PopRegs(false, false);
|
||||
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (size == 32)
|
||||
@ -314,6 +316,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
|
||||
}
|
||||
}
|
||||
|
||||
PopRegs(false, false);
|
||||
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (size == 32)
|
||||
@ -515,8 +519,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||
ptrdiff_t fastPathStart = GetCodeOffset();
|
||||
ptrdiff_t loadStoreOffsets[8];
|
||||
|
||||
MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
|
||||
ADD(X1, X1, X0);
|
||||
ADD(X1, RMemBase, X0);
|
||||
|
||||
u32 offset = 0;
|
||||
BitSet16::Iterator it = regs.begin();
|
||||
@ -655,6 +658,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||
}
|
||||
}
|
||||
|
||||
PushRegs(false, false, !compileFastPath);
|
||||
|
||||
ADD(X1, SP, 0);
|
||||
MOVI2R(W2, regsCount);
|
||||
|
||||
@ -680,6 +685,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||
}
|
||||
}
|
||||
|
||||
PopRegs(false, false);
|
||||
|
||||
if (!store)
|
||||
{
|
||||
if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
|
||||
|
Reference in New Issue
Block a user