first steps in bringing over the JIT refactor/fastmem

This commit is contained in:
RSDuck
2020-06-14 21:04:25 +02:00
parent fea9f95bba
commit e335a8ca76
25 changed files with 2368 additions and 1624 deletions

View File

@ -2,286 +2,62 @@
#include "../Config.h"
#include "../ARMJIT_Memory.h"
using namespace Arm64Gen;
namespace ARMJIT
{
// W0 - address
// (if store) W1 - value to store
// W2 - code cycles
void* Compiler::Gen_MemoryRoutine9(int size, bool store)
bool Compiler::IsJITFault(u64 pc)
{
AlignCode16();
void* res = GetRXPtr();
u32 addressMask;
switch (size)
{
case 32: addressMask = ~3; break;
case 16: addressMask = ~1; break;
case 8: addressMask = ~0; break;
}
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase));
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize));
SUB(W3, W0, W3);
CMP(W3, W4);
FixupBranch insideDTCM = B(CC_LO);
UBFX(W4, W0, 24, 8);
CMP(W4, 0x02);
FixupBranch outsideMainRAM = B(CC_NEQ);
ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1));
MOVP2R(X4, NDS::MainRAM);
if (!store && size == 32)
{
LDR(W3, X3, X4);
ANDI2R(W0, W0, 3);
LSL(W0, W0, 3);
RORV(W0, W3, W0);
}
else if (store)
STRGeneric(size, W1, X3, X4);
else
LDRGeneric(size, false, W0, X3, X4);
RET();
SetJumpTarget(outsideMainRAM);
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
CMP(W0, W3);
FixupBranch insideITCM = B(CC_LO);
if (store)
{
if (size > 8)
ANDI2R(W0, W0, addressMask);
switch (size)
{
case 32: QuickTailCall(X4, NDS::ARM9Write32); break;
case 16: QuickTailCall(X4, NDS::ARM9Write16); break;
case 8: QuickTailCall(X4, NDS::ARM9Write8); break;
}
}
else
{
if (size == 32)
ABI_PushRegisters({0, 30});
if (size > 8)
ANDI2R(W0, W0, addressMask);
switch (size)
{
case 32: QuickCallFunction(X4, NDS::ARM9Read32); break;
case 16: QuickTailCall (X4, NDS::ARM9Read16); break;
case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break;
}
if (size == 32)
{
ABI_PopRegisters({1, 30});
ANDI2R(W1, W1, 3);
LSL(W1, W1, 3);
RORV(W0, W0, W1);
RET();
}
}
SetJumpTarget(insideDTCM);
ANDI2R(W3, W3, 0x3FFF & addressMask);
ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4);
if (!store && size == 32)
{
ANDI2R(W4, W0, 3);
LDR(W0, RCPU, W3);
LSL(W4, W4, 3);
RORV(W0, W0, W4);
}
else if (store)
STRGeneric(size, W1, RCPU, W3);
else
LDRGeneric(size, false, W0, RCPU, W3);
RET();
SetJumpTarget(insideITCM);
ANDI2R(W3, W0, 0x7FFF & addressMask);
if (store)
{
ADDI2R(W0, W3, ExeMemRegionOffsets[exeMem_ITCM], W4);
LSR(W5, W0, 9);
MOVP2R(X4, CodeRanges);
ADD(X4, X4, X5, ArithOption(X5, ST_LSL, 4));
static_assert(sizeof(AddressRange) == 16);
LDRH(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length));
FixupBranch null = CBZ(W4);
ABI_PushRegisters({1, 3, 30});
QuickCallFunction(X4, InvalidateByAddr);
ABI_PopRegisters({1, 3, 30});
SetJumpTarget(null);
}
ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4);
if (!store && size == 32)
{
ANDI2R(W4, W0, 3);
LDR(W0, RCPU, W3);
LSL(W4, W4, 3);
RORV(W0, W0, W4);
}
else if (store)
STRGeneric(size, W1, RCPU, W3);
else
LDRGeneric(size, false, W0, RCPU, W3);
RET();
return res;
return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
/*
W0 - base address
X1 - stack space
W2 - values count
*/
void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc)
s64 Compiler::RewriteMemAccess(u64 pc)
{
AlignCode16();
void* res = GetRXPtr();
void* loopStart = GetRXPtr();
SUB(W2, W2, 1);
ptrdiff_t pcOffset = pc - (u64)GetRXBase();
if (preinc)
ADD(W0, W0, 4);
auto it = LoadStorePatches.find(pcOffset);
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase));
LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize));
SUB(W4, W0, W4);
CMP(W4, W5);
FixupBranch insideDTCM = B(CC_LO);
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize));
CMP(W0, W4);
FixupBranch insideITCM = B(CC_LO);
ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once
if (store)
if (it != LoadStorePatches.end())
{
LDR(X1, X1, ArithOption(X2, true));
QuickCallFunction(X4, NDS::ARM9Write32);
LoadStorePatch patch = it->second;
ABI_PopRegisters({0, 1, 2, 30});
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
LoadStorePatches.erase(it);
return patch.PatchOffset;
}
else
{
QuickCallFunction(X4, NDS::ARM9Read32);
MOV(W4, W0);
ABI_PopRegisters({0, 1, 2, 30});
STR(X4, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
SetJumpTarget(insideDTCM);
ANDI2R(W4, W4, ~3 & 0x3FFF);
ADDI2R(X4, X4, offsetof(ARMv5, DTCM));
if (store)
{
LDR(X5, X1, ArithOption(X2, true));
STR(W5, RCPU, X4);
}
else
{
LDR(W5, RCPU, X4);
STR(X5, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
SetJumpTarget(insideITCM);
ANDI2R(W4, W0, ~3 & 0x7FFF);
ADDI2R(W6, W4, offsetof(ARMv5, ITCM), W5);
if (store)
{
LDR(X5, X1, ArithOption(X2, true));
STR(W5, RCPU, X6);
}
else
{
LDR(W5, RCPU, X6);
STR(X5, X1, ArithOption(X2, true));
}
if (store)
{
ADDI2R(W4, W4, ExeMemRegionOffsets[exeMem_ITCM], W5);
LSR(W6, W4, 9);
MOVP2R(X5, CodeRanges);
ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4));
static_assert(sizeof(AddressRange) == 16);
LDRH(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length));
FixupBranch null = CBZ(W5);
ABI_PushRegisters({0, 1, 2, 4, 30});
MOV(W0, W4);
QuickCallFunction(X5, InvalidateByAddr);
ABI_PopRegisters({0, 1, 2, 4, 30});
SetJumpTarget(null);
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
return res;
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
assert(false);
}
void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc)
bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
AlignCode16();
void* res = GetRXPtr();
u32 localAddr = LocaliseCodeAddress(Num, addr);
void* loopStart = GetRXPtr();
SUB(W2, W2, 1);
if (preinc)
ADD(W0, W0, 4);
ABI_PushRegisters({0, 1, 2, 30});
if (store)
int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
if (invalidLiteralIdx != -1)
{
LDR(X1, X1, ArithOption(X2, true));
QuickCallFunction(X4, NDS::ARM7Write32);
ABI_PopRegisters({0, 1, 2, 30});
}
else
{
QuickCallFunction(X4, NDS::ARM7Read32);
MOV(W4, W0);
ABI_PopRegisters({0, 1, 2, 30});
STR(X4, X1, ArithOption(X2, true));
InvalidLiterals.Remove(invalidLiteralIdx);
return false;
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
Comp_AddCycles_CDI();
return res;
}
void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
@ -309,6 +85,8 @@ void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
@ -318,162 +96,208 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
return;
}
if (flags & memop_Store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
{
Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr);
return;
}
if (Thumb && rn == 15)
{
ANDI2R(W3, rnMapped, ~2);
rnMapped = W3;
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
bool inlinePreparation = Num == 1;
u32 constLocalROR32 = 4;
bool addrIsStatic = Config::JIT_LiteralOptimisations
&& RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post));
u32 staticAddress;
if (addrIsStatic)
staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
void* memFunc = Num == 0
? MemFunc9[size >> 4][!!(flags & memop_Store)]
: MemFunc7[size >> 4][!!((flags & memop_Store))];
if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn))
{
u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
NDS::MemRegion region;
region.Mem = NULL;
if (Num == 0)
{
ARMv5* cpu5 = (ARMv5*)CurCPU;
// stupid dtcm...
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
{
region.Mem = cpu5->DTCM;
region.Mask = 0x3FFF;
}
else
{
NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
}
}
else
NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
if (region.Mem != NULL)
{
void* ptr = &region.Mem[addr & addressMask & region.Mask];
MOVP2R(X0, ptr);
if (flags & memop_Store)
STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0);
else
{
LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0);
if (size == 32 && addr & ~0x3)
ROR_(rdMapped, rdMapped, (addr & 0x3) << 3);
}
return;
}
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
if (specialFunc)
{
memFunc = specialFunc;
inlinePreparation = true;
constLocalROR32 = addr & 0x3;
}
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
if (flags & memop_Store)
MOV(W1, rdMapped);
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might become an immediate
if (offset.IsImm)
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might has become an immediate
if (offset.IsImm)
{
if (offset.Imm)
{
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Imm);
else
ADD(finalAddr, rnMapped, offset.Imm);
}
else if (finalAddr != rnMapped)
MOV(finalAddr, rnMapped);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
{
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
u32 expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsMappable(expectedTarget)))
{
ptrdiff_t memopStart = GetCodeOffset();
LoadStorePatch patch;
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19]
: PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19];
assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8);
MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
// take a chance at fastmem
if (size > 8)
ANDI2R(W1, W0, addressMask);
ptrdiff_t loadStorePosition = GetCodeOffset();
if (flags & memop_Store)
{
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
if (size == 32)
{
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
patch.PatchOffset = memopStart - loadStorePosition;
patch.PatchSize = GetCodeOffset() - memopStart;
LoadStorePatches[loadStorePosition] = patch;
}
else
{
void* func = NULL;
if (addrIsStatic)
func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
if (inlinePreparation)
if (func)
{
if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4)
ANDI2R(rdMapped, W0, 3);
if (size > 8)
ANDI2R(W0, W0, addressMask);
}
QuickCallFunction(X2, memFunc);
if (!(flags & memop_Store))
{
if (inlinePreparation && !(flags & memop_Store) && size == 32)
if (flags & memop_Store)
MOV(W1, rdMapped);
QuickCallFunction(X2, (void (*)())func);
if (!(flags & memop_Store))
{
if (constLocalROR32 == 4)
if (size == 32)
{
LSL(rdMapped, rdMapped, 3);
RORV(rdMapped, W0, rdMapped);
if (staticAddress & 0x3)
ROR_(rdMapped, W0, (staticAddress & 0x3) << 3);
else
MOV(rdMapped, W0);
}
else if (constLocalROR32 > 0)
ROR_(rdMapped, W0, constLocalROR32 << 3);
else
MOV(rdMapped, W0);
}
else if (flags & memop_SignExtend)
{
if (size == 16)
SXTH(rdMapped, W0);
else if (size == 8)
SXTB(rdMapped, W0);
else
assert("What's wrong with you?");
}
else
MOV(rdMapped, W0);
if (CurInstr.Info.Branches())
{
if (size < 32)
printf("LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
{
if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
else
{
if (Num == 0)
{
MOV(X1, RCPU);
if (flags & memop_Store)
{
MOV(W2, rdMapped);
switch (size)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
}
}
else
{
switch (size)
{
case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
}
}
}
else
{
if (flags & memop_Store)
{
MOV(W1, rdMapped);
switch (size)
{
case 32: QuickCallFunction(X3, SlowWrite7<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite7<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite7<u8>); break;
}
}
else
{
switch (size)
{
case 32: QuickCallFunction(X3, SlowRead7<u32>); break;
case 16: QuickCallFunction(X3, SlowRead7<u16>); break;
case 8: QuickCallFunction(X3, SlowRead7<u8>); break;
}
}
}
if (!(flags & memop_Store))
{
if (size == 32)
MOV(rdMapped, W0);
else if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
if (CurInstr.Info.Branches())
{
if (size < 32)
printf("LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
}
}
@ -589,19 +413,11 @@ void Compiler::T_Comp_MemImmHalf()
void Compiler::T_Comp_LoadPCRel()
{
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
u32 offset = ((CurInstr.Instr & 0xFF) << 2);
u32 addr = (R15 & ~0x2) + offset;
if (Config::JIT_LiteralOptimisations)
{
Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr);
Comp_AddCycles_CDI();
}
else
{
bool negative = addr < R15;
u32 abs = negative ? R15 - addr : addr - R15;
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0);
}
if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
@ -621,15 +437,138 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
{
int flags = 0;
if (store)
flags |= memop_Store;
if (decrement)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
return decrement ? -4 : 4;
}
if (store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
int expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
bool compileFastPath = Config::JIT_FastMemory
&& store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsMappable(expectedTarget));
if (decrement)
{
SUB(W0, MapReg(rn), regsCount * 4);
ANDI2R(W0, W0, ~3);
preinc ^= true;
}
else
{
ANDI2R(W0, MapReg(rn), ~3);
}
LoadStorePatch patch;
if (compileFastPath)
{
ptrdiff_t fastPathStart = GetCodeOffset();
ptrdiff_t firstLoadStoreOffset;
bool firstLoadStore = true;
MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
ADD(X1, X1, X0);
u32 offset = preinc ? 4 : 0;
BitSet16::Iterator it = regs.begin();
if (regsCount & 1)
{
int reg = *it;
it++;
ARM64Reg first = W3;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
if (firstLoadStore)
{
firstLoadStoreOffset = GetCodeOffset();
firstLoadStore = false;
}
if (store)
STR(INDEX_UNSIGNED, first, X1, offset);
else
LDR(INDEX_UNSIGNED, first, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
SaveReg(reg, first);
offset += 4;
}
while (it != regs.end())
{
int reg = *it;
it++;
int nextReg = *it;
it++;
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
if (RegCache.LoadedRegs & (1 << nextReg))
second = MapReg(nextReg);
else if (store)
LoadReg(nextReg, second);
if (firstLoadStore)
{
firstLoadStoreOffset = GetCodeOffset();
firstLoadStore = false;
}
if (store)
STP(INDEX_SIGNED, first, second, X1, offset);
else
LDP(INDEX_SIGNED, first, second, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
SaveReg(reg, first);
if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store)
SaveReg(nextReg, second);
offset += 8;
}
patch.PatchSize = GetCodeOffset() - fastPathStart;
patch.PatchOffset = fastPathStart - firstLoadStoreOffset;
SwapCodeRegion();
patch.PatchFunc = GetRXPtr();
LoadStorePatches[firstLoadStoreOffset] = patch;
ABI_PushRegisters({30});
}
int i = 0;
SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
if (store)
{
Comp_AddCycles_CD();
if (usermode && (regs & BitSet16(0x7f00)))
UBFX(W0, RCPSR, 0, 5);
int i = regsCount - 1;
UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
@ -641,7 +580,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (usermode && reg >= 8 && reg < 15)
{
if (RegCache.Mapping[reg] != INVALID_REG)
if (RegCache.LoadedRegs & (1 << reg))
MOV(W3, MapReg(reg));
else
LoadReg(reg, W3);
@ -651,55 +590,67 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3;
ARM64Reg second = W4;
ARM64Reg first = W3, second = W4;
if (RegCache.Mapping[reg] != INVALID_REG)
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else
LoadReg(reg, W3);
if (RegCache.Mapping[*nextReg] != INVALID_REG)
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
else
LoadReg(*nextReg, W4);
STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
i--;
i++;
it++;
}
else if (RegCache.Mapping[reg] != INVALID_REG)
else if (RegCache.LoadedRegs & (1 << reg))
{
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
}
else
{
LoadReg(reg, W3);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
i--;
i++;
it++;
}
}
if (decrement)
{
SUB(W0, MapReg(rn), regsCount * 4);
preinc ^= true;
}
else
MOV(W0, MapReg(rn));
ADD(X1, SP, 0);
MOVI2R(W2, regsCount);
BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]);
if (Num == 0)
{
MOV(X3, RCPU);
switch (preinc * 2 | store)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break;
}
}
else
{
switch (preinc * 2 | store)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break;
}
}
if (!store)
{
Comp_AddCycles_CDI();
if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
UBFX(W0, RCPSR, 0, 5);
UBFX(W5, RCPSR, 0, 5);
int i = regsCount - 1;
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
@ -714,11 +665,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
MOVI2R(W1, reg - 8);
BL(WriteBanked);
FixupBranch alreadyWritten = CBNZ(W4);
if (RegCache.Mapping[reg] != INVALID_REG)
{
if (RegCache.LoadedRegs & (1 << reg))
MOV(MapReg(reg), W3);
RegCache.DirtyRegs |= 1 << reg;
}
else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
@ -727,20 +675,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
{
ARM64Reg first = W3, second = W4;
if (RegCache.Mapping[reg] != INVALID_REG)
{
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
if (reg != 15)
RegCache.DirtyRegs |= 1 << reg;
}
if (RegCache.Mapping[*nextReg] != INVALID_REG)
{
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
if (*nextReg != 15)
RegCache.DirtyRegs |= 1 << *nextReg;
}
LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
if (first == W3)
SaveReg(reg, W3);
@ -748,15 +688,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
SaveReg(*nextReg, W4);
it++;
i--;
i++;
}
else if (RegCache.Mapping[reg] != INVALID_REG)
else if (RegCache.LoadedRegs & (1 << reg))
{
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
if (reg != 15)
RegCache.DirtyRegs |= 1 << reg;
}
else
{
@ -765,11 +702,20 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
it++;
i--;
i++;
}
}
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
if (compileFastPath)
{
ABI_PopRegisters({30});
RET();
FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr());
SwapCodeRegion();
}
if (!store && regs[15])
{
ARM64Reg mapped = MapReg(15);