mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-29 00:59:56 -06:00
new block cache and much more...
- more reliable code invalidation detection - blocks aren't stopped at any branch, but are being followed if possible to get larger blocks - idle loop recognition - optimised literal loads, load/store cycle counting and loads/stores from constant addresses
This commit is contained in:
@ -213,7 +213,13 @@ void Compiler::A_Comp_MovOp()
|
||||
MOV(32, rd, op2);
|
||||
|
||||
if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
|
||||
{
|
||||
NOT(32, rd);
|
||||
if (op2.IsImm() && CurInstr.Cond() == 0xE)
|
||||
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm32());
|
||||
}
|
||||
else if (op2.IsImm() && CurInstr.Cond() == 0xE)
|
||||
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm32());
|
||||
|
||||
if (S)
|
||||
{
|
||||
@ -564,7 +570,13 @@ void Compiler::T_Comp_AddSub_()
|
||||
|
||||
Comp_AddCycles_C();
|
||||
|
||||
if (op & 1)
|
||||
// special case for thumb mov being alias to add rd, rn, #0
|
||||
if (CurInstr.SetFlags == 0 && rn.IsImm() && rn.Imm32() == 0)
|
||||
{
|
||||
if (rd != rs)
|
||||
MOV(32, rd, rs);
|
||||
}
|
||||
else if (op & 1)
|
||||
Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
|
||||
else
|
||||
Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
|
||||
@ -614,7 +626,7 @@ void Compiler::T_Comp_ALU()
|
||||
u32 op = (CurInstr.Instr >> 6) & 0xF;
|
||||
|
||||
if ((op >= 0x2 && op < 0x4) || op == 0x7)
|
||||
Comp_AddCycles_CI(1);
|
||||
Comp_AddCycles_CI(1); // shift by reg
|
||||
else
|
||||
Comp_AddCycles_C();
|
||||
|
||||
|
@ -16,9 +16,6 @@ int squeezePointer(T* ptr)
|
||||
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
{
|
||||
// we can simplify constant branches by a lot
|
||||
// it's not completely safe to assume stuff like, which instructions to preload
|
||||
// we'll see how it works out
|
||||
|
||||
IrregularCycles = true;
|
||||
|
||||
u32 newPC;
|
||||
@ -39,18 +36,12 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
{
|
||||
ARMv5* cpu9 = (ARMv5*)CurCPU;
|
||||
|
||||
u32 oldregion = R15 >> 24;
|
||||
u32 newregion = addr >> 24;
|
||||
|
||||
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
|
||||
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
|
||||
cpu9->RegionCodeCycles = regionCodeCycles;
|
||||
|
||||
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
||||
|
||||
bool setupRegion = newregion != oldregion;
|
||||
if (setupRegion)
|
||||
cpu9->SetupCodeMem(addr);
|
||||
if (Exit)
|
||||
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
||||
|
||||
if (addr & 0x1)
|
||||
{
|
||||
@ -83,12 +74,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
cycles += cpu9->CodeCycles;
|
||||
}
|
||||
|
||||
MOV(64, MDisp(RCPU, offsetof(ARM, CodeMem.Mem)), Imm32(squeezePointer(cpu9->CodeMem.Mem)));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeMem.Mask)), Imm32(cpu9->CodeMem.Mask));
|
||||
|
||||
cpu9->RegionCodeCycles = compileTimeCodeCycles;
|
||||
if (setupRegion)
|
||||
cpu9->SetupCodeMem(R15);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -100,8 +86,11 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
cpu7->CodeRegion = codeRegion;
|
||||
cpu7->CodeCycles = codeCycles;
|
||||
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
|
||||
if (Exit)
|
||||
{
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
|
||||
}
|
||||
|
||||
if (addr & 0x1)
|
||||
{
|
||||
@ -133,7 +122,8 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
cpu7->CodeCycles = addr >> 15;
|
||||
}
|
||||
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
||||
if (Exit)
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||
ConstantCycles += cycles;
|
||||
else
|
||||
@ -219,10 +209,23 @@ void Compiler::T_Comp_BCOND()
|
||||
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
||||
Comp_JumpTo(R15 + offset + 1, true);
|
||||
|
||||
Comp_SpecialBranchBehaviour();
|
||||
|
||||
FixupBranch skipFailed = J();
|
||||
SetJumpTarget(skipExecute);
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||
{
|
||||
RegCache.PrepareExit();
|
||||
SaveCPSR(false);
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
}
|
||||
|
||||
Comp_AddCycles_C(true);
|
||||
SetJumpTarget(skipFailed);
|
||||
SetJumpTarget(skipFailed);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_B()
|
||||
|
@ -72,12 +72,15 @@ Compiler::Compiler()
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
|
||||
MemoryFuncs7[i][j][0] = Gen_MemoryRoutine7(j, false, 8 << i);
|
||||
MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i);
|
||||
}
|
||||
}
|
||||
MemoryFuncs7[0][0] = (void*)NDS::ARM7Read8;
|
||||
MemoryFuncs7[0][1] = (void*)NDS::ARM7Write8;
|
||||
MemoryFuncs7[1][0] = (void*)NDS::ARM7Read16;
|
||||
MemoryFuncs7[1][1] = (void*)NDS::ARM7Write16;
|
||||
MemoryFuncs7[2][0] = (void*)NDS::ARM7Read32;
|
||||
MemoryFuncs7[2][1] = (void*)NDS::ARM7Write32;
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
@ -179,12 +182,13 @@ void Compiler::LoadCPSR()
|
||||
MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
|
||||
}
|
||||
|
||||
void Compiler::SaveCPSR()
|
||||
void Compiler::SaveCPSR(bool flagClean)
|
||||
{
|
||||
if (CPSRDirty)
|
||||
{
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
|
||||
CPSRDirty = false;
|
||||
if (flagClean)
|
||||
CPSRDirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -204,6 +208,9 @@ void Compiler::SaveReg(int reg, X64Reg nativeReg)
|
||||
// invalidates RSCRATCH and RSCRATCH3
|
||||
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
|
||||
{
|
||||
// hack, ldm/stm can get really big TODO: make this better
|
||||
bool ldmStm = !Thumb &&
|
||||
(CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM);
|
||||
if (cond >= 0x8)
|
||||
{
|
||||
static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!");
|
||||
@ -213,14 +220,14 @@ Gen::FixupBranch Compiler::CheckCondition(u32 cond)
|
||||
SHL(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
|
||||
|
||||
return J_CC(CC_Z);
|
||||
return J_CC(CC_Z, ldmStm);
|
||||
}
|
||||
else
|
||||
{
|
||||
// could have used a LUT, but then where would be the fun?
|
||||
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
|
||||
|
||||
return J_CC(cond & 1 ? CC_NZ : CC_Z);
|
||||
return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm);
|
||||
}
|
||||
}
|
||||
|
||||
@ -354,25 +361,34 @@ void Compiler::Reset()
|
||||
SetCodePtr(ResetStart);
|
||||
}
|
||||
|
||||
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
|
||||
void Compiler::Comp_SpecialBranchBehaviour()
|
||||
{
|
||||
if (CurInstr.BranchFlags & branch_IdleBranch)
|
||||
OR(32, MDisp(RCPU, offsetof(ARM, Halted)), Imm8(0x20));
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
||||
{
|
||||
RegCache.PrepareExit();
|
||||
SaveCPSR(false);
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
}
|
||||
}
|
||||
|
||||
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||
{
|
||||
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
||||
InvalidateBlockCache();
|
||||
ResetBlockCache();
|
||||
|
||||
ConstantCycles = 0;
|
||||
Thumb = cpu->CPSR & 0x20;
|
||||
Thumb = thumb;
|
||||
Num = cpu->Num;
|
||||
CodeRegion = cpu->CodeRegion;
|
||||
CodeRegion = instrs[0].Addr >> 24;
|
||||
CurCPU = cpu;
|
||||
|
||||
CompiledBlock res = (CompiledBlock)GetWritableCodePtr();
|
||||
|
||||
if (!(Num == 0
|
||||
? IsMapped<0>(instrs[0].Addr - (Thumb ? 2 : 4))
|
||||
: IsMapped<1>(instrs[0].Addr - (Thumb ? 2 : 4))))
|
||||
{
|
||||
printf("Trying to compile a block in unmapped memory\n");
|
||||
}
|
||||
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
|
||||
@ -380,7 +396,6 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||
|
||||
LoadCPSR();
|
||||
|
||||
// TODO: this is ugly as a whole, do better
|
||||
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
||||
|
||||
for (int i = 0; i < instrsCount; i++)
|
||||
@ -388,21 +403,25 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||
CurInstr = instrs[i];
|
||||
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
|
||||
|
||||
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
|
||||
|
||||
CompileFunc comp = Thumb
|
||||
? T_Comp[CurInstr.Info.Kind]
|
||||
: A_Comp[CurInstr.Info.Kind];
|
||||
|
||||
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
|
||||
if (comp == NULL || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
|
||||
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
|
||||
{
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
|
||||
|
||||
if (comp == NULL)
|
||||
{
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
|
||||
|
||||
SaveCPSR();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (comp != NULL)
|
||||
RegCache.Prepare(Thumb, i);
|
||||
else
|
||||
@ -410,12 +429,11 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||
|
||||
if (Thumb)
|
||||
{
|
||||
u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
|
||||
if (comp == NULL)
|
||||
{
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
|
||||
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
|
||||
ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]);
|
||||
}
|
||||
else
|
||||
(this->*comp)();
|
||||
@ -434,7 +452,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||
}
|
||||
}
|
||||
else if (cond == 0xF)
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
}
|
||||
else
|
||||
{
|
||||
IrregularCycles = false;
|
||||
@ -443,25 +463,36 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||
if (cond < 0xE)
|
||||
skipExecute = CheckCondition(cond);
|
||||
|
||||
u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
|
||||
if (comp == NULL)
|
||||
{
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
|
||||
ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]);
|
||||
ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]);
|
||||
}
|
||||
else
|
||||
(this->*comp)();
|
||||
|
||||
Comp_SpecialBranchBehaviour();
|
||||
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
{
|
||||
if (IrregularCycles)
|
||||
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
|
||||
{
|
||||
FixupBranch skipFailed = J();
|
||||
SetJumpTarget(skipExecute);
|
||||
|
||||
Comp_AddCycles_C(true);
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||
{
|
||||
RegCache.PrepareExit();
|
||||
SaveCPSR(false);
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
}
|
||||
|
||||
SetJumpTarget(skipFailed);
|
||||
}
|
||||
else
|
||||
@ -483,6 +514,12 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
|
||||
/*FILE* codeout = fopen("codeout", "a");
|
||||
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
|
||||
fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout);
|
||||
|
||||
fclose(codeout);*/
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -528,4 +565,89 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_CDI()
|
||||
{
|
||||
if (Num == 0)
|
||||
Comp_AddCycles_CD();
|
||||
else
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
s32 cycles;
|
||||
|
||||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if (CurInstr.DataRegion == 0x02) // mainRAM
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles = numC + numD;
|
||||
else
|
||||
{
|
||||
numC++;
|
||||
cycles = std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
}
|
||||
else if (CodeRegion == 0x02)
|
||||
{
|
||||
numD++;
|
||||
cycles = std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
else
|
||||
{
|
||||
cycles = numC + numD + 1;
|
||||
}
|
||||
|
||||
printf("%x: %d %d cycles cdi (%d)\n", CurInstr.Instr, Num, CurInstr.DataCycles, cycles);
|
||||
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_CD()
|
||||
{
|
||||
u32 cycles = 0;
|
||||
if (Num == 0)
|
||||
{
|
||||
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
//if (DataRegion != CodeRegion)
|
||||
cycles = std::max(numC + numD - 6, std::max(numC, numD));
|
||||
|
||||
IrregularCycles = cycles != numC;
|
||||
}
|
||||
else
|
||||
{
|
||||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if (CurInstr.DataRegion == 0x02)
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles += numC + numD;
|
||||
else
|
||||
cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
else if (CodeRegion == 0x02)
|
||||
{
|
||||
cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
else
|
||||
{
|
||||
cycles += numC + numD;
|
||||
}
|
||||
|
||||
IrregularCycles = true;
|
||||
}
|
||||
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
|
||||
}
|
@ -4,6 +4,7 @@
|
||||
#include "../dolphin/x64Emitter.h"
|
||||
|
||||
#include "../ARMJIT.h"
|
||||
#include "../ARMJIT_Internal.h"
|
||||
#include "../ARMJIT_RegisterCache.h"
|
||||
|
||||
namespace ARMJIT
|
||||
@ -16,6 +17,32 @@ const Gen::X64Reg RSCRATCH = Gen::EAX;
|
||||
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
|
||||
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
|
||||
|
||||
struct ComplexOperand
|
||||
{
|
||||
ComplexOperand()
|
||||
{}
|
||||
|
||||
ComplexOperand(u32 imm)
|
||||
: IsImm(true), Imm(imm)
|
||||
{}
|
||||
ComplexOperand(int reg, int op, int amount)
|
||||
: IsImm(false)
|
||||
{
|
||||
Reg.Reg = reg;
|
||||
Reg.Op = op;
|
||||
Reg.Amount = amount;
|
||||
}
|
||||
|
||||
bool IsImm;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
int Reg, Op, Amount;
|
||||
} Reg;
|
||||
u32 Imm;
|
||||
};
|
||||
};
|
||||
|
||||
class Compiler : public Gen::XEmitter
|
||||
{
|
||||
@ -24,7 +51,7 @@ public:
|
||||
|
||||
void Reset();
|
||||
|
||||
CompiledBlock CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount);
|
||||
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||
|
||||
void LoadReg(int reg, Gen::X64Reg nativeReg);
|
||||
void SaveReg(int reg, Gen::X64Reg nativeReg);
|
||||
@ -39,6 +66,8 @@ public:
|
||||
void Comp_AddCycles_C(bool forceNonConstant = false);
|
||||
void Comp_AddCycles_CI(u32 i);
|
||||
void Comp_AddCycles_CI(Gen::X64Reg i, int add);
|
||||
void Comp_AddCycles_CDI();
|
||||
void Comp_AddCycles_CD();
|
||||
|
||||
enum
|
||||
{
|
||||
@ -92,8 +121,17 @@ public:
|
||||
void T_Comp_BL_LONG_2();
|
||||
void T_Comp_BL_Merged();
|
||||
|
||||
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
|
||||
enum
|
||||
{
|
||||
memop_Writeback = 1 << 0,
|
||||
memop_Post = 1 << 1,
|
||||
memop_SignExtend = 1 << 2,
|
||||
memop_Store = 1 << 3,
|
||||
memop_SubtractOffset = 1 << 4
|
||||
};
|
||||
void Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags);
|
||||
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
||||
void Comp_MemLoadLiteral(int size, int rd, u32 addr);
|
||||
|
||||
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
|
||||
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
|
||||
@ -105,8 +143,9 @@ public:
|
||||
|
||||
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
|
||||
|
||||
void Comp_SpecialBranchBehaviour();
|
||||
|
||||
void* Gen_MemoryRoutine9(bool store, int size);
|
||||
void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size);
|
||||
|
||||
void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
|
||||
void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
|
||||
@ -117,10 +156,9 @@ public:
|
||||
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
|
||||
|
||||
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
|
||||
Gen::OpArg A_Comp_GetMemWBOffset();
|
||||
|
||||
void LoadCPSR();
|
||||
void SaveCPSR();
|
||||
void SaveCPSR(bool flagClean = true);
|
||||
|
||||
bool FlagsNZRequired()
|
||||
{ return CurInstr.SetFlags & 0xC; }
|
||||
@ -139,10 +177,11 @@ public:
|
||||
u8* ResetStart;
|
||||
u32 CodeMemSize;
|
||||
|
||||
bool Exit;
|
||||
bool IrregularCycles;
|
||||
|
||||
void* MemoryFuncs9[3][2];
|
||||
void* MemoryFuncs7[3][2][2];
|
||||
void* MemoryFuncs7[3][2];
|
||||
|
||||
void* MemoryFuncsSeq9[2][2];
|
||||
void* MemoryFuncsSeq7[2][2][2];
|
||||
|
@ -27,51 +27,7 @@ int squeezePointer(T* ptr)
|
||||
/*
|
||||
address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
|
||||
store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
|
||||
code cycles - ABI_PARAM3
|
||||
*/
|
||||
|
||||
#define CALC_CYCLES_9(numC, numD, scratch) \
|
||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -6)); \
|
||||
CMP(32, R(numC), R(numD)); \
|
||||
CMOVcc(32, numD, R(numC), CC_G); \
|
||||
CMP(32, R(numD), R(scratch)); \
|
||||
CMOVcc(32, scratch, R(numD), CC_G); \
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch));
|
||||
#define CALC_CYCLES_7_DATA_MAIN_RAM(numC, numD, scratch) \
|
||||
if (codeMainRAM) \
|
||||
{ \
|
||||
LEA(32, scratch, MRegSum(numD, numC)); \
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if (!store) \
|
||||
ADD(32, R(numC), Imm8(1)); \
|
||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -3)); \
|
||||
CMP(32, R(numD), R(numC)); \
|
||||
CMOVcc(32, numC, R(numD), CC_G); \
|
||||
CMP(32, R(numC), R(scratch)); \
|
||||
CMOVcc(32, scratch, R(numC), CC_G); \
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||
}
|
||||
#define CALC_CYCLES_7_DATA_NON_MAIN_RAM(numC, numD, scratch) \
|
||||
if (codeMainRAM) \
|
||||
{ \
|
||||
if (!store) \
|
||||
ADD(32, R(numD), Imm8(1)); \
|
||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -3)); \
|
||||
CMP(32, R(numD), R(numC)); \
|
||||
CMOVcc(32, numC, R(numD), CC_G); \
|
||||
CMP(32, R(numC), R(scratch)); \
|
||||
CMOVcc(32, scratch, R(numC), CC_G); \
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, store ? 0 : 1)); \
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||
}
|
||||
|
||||
void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
{
|
||||
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
||||
@ -86,12 +42,6 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
|
||||
FixupBranch insideITCM = J_CC(CC_B);
|
||||
|
||||
// cycle counting!
|
||||
MOV(32, R(ABI_PARAM4), R(ABI_PARAM1));
|
||||
SHR(32, R(ABI_PARAM4), Imm8(12));
|
||||
MOVZX(32, 8, ABI_PARAM4, MComplex(RCPU, ABI_PARAM4, SCALE_4, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 1)));
|
||||
CALC_CYCLES_9(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
||||
|
||||
if (store)
|
||||
{
|
||||
if (size > 8)
|
||||
@ -127,7 +77,6 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
}
|
||||
|
||||
SetJumpTarget(insideDTCM);
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM3));
|
||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
||||
if (store)
|
||||
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
|
||||
@ -146,16 +95,22 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideITCM);
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM3));
|
||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
|
||||
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
|
||||
if (store)
|
||||
{
|
||||
MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
|
||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(RSCRATCH));
|
||||
if (size == 32)
|
||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(RSCRATCH));
|
||||
|
||||
// if CodeRanges[pseudoPhysical/256].Blocks.Length > 0 we're writing into code!
|
||||
static_assert(sizeof(AddressRange) == 16);
|
||||
LEA(32, ABI_PARAM1, MDisp(ABI_PARAM3, ExeMemRegionOffsets[exeMem_ITCM]));
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(8));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
JMP((u8*)InvalidateByAddr, true);
|
||||
SetJumpTarget(noCode);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -176,83 +131,6 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
return res;
|
||||
}
|
||||
|
||||
void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
||||
{
|
||||
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
||||
AlignCode4();
|
||||
void* res = GetWritableCodePtr();
|
||||
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(15));
|
||||
MOVZX(32, 8, ABI_PARAM4, MScaled(RSCRATCH, SCALE_4, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings)));
|
||||
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
||||
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
||||
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
||||
CALC_CYCLES_7_DATA_MAIN_RAM(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1));
|
||||
AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask));
|
||||
if (store)
|
||||
{
|
||||
MOV(size, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)), R(ABI_PARAM2));
|
||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM)), R(RSCRATCH));
|
||||
if (size == 32)
|
||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM) + 8), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVZX(32, size, RSCRATCH, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)));
|
||||
if (size == 32)
|
||||
{
|
||||
if (ABI_PARAM1 != ECX)
|
||||
MOV(32, R(ECX), R(ABI_PARAM1));
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
}
|
||||
}
|
||||
RET();
|
||||
|
||||
SetJumpTarget(outsideMainRAM);
|
||||
CALC_CYCLES_7_DATA_NON_MAIN_RAM(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
||||
if (store)
|
||||
{
|
||||
if (size > 8)
|
||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
||||
switch (size)
|
||||
{
|
||||
case 32: JMP((u8*)NDS::ARM7Write32, true); break;
|
||||
case 16: JMP((u8*)NDS::ARM7Write16, true); break;
|
||||
case 8: JMP((u8*)NDS::ARM7Write8, true); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (size == 32)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
|
||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
||||
ABI_CallFunction(NDS::ARM7Read32);
|
||||
ABI_PopRegistersAndAdjustStack({ECX}, 8);
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
RET();
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
||||
JMP((u8*)NDS::ARM7Read16, true);
|
||||
}
|
||||
else
|
||||
JMP((u8*)NDS::ARM7Read8, true);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#define MEMORY_SEQ_WHILE_COND \
|
||||
if (!store) \
|
||||
MOV(32, currentElement, R(EAX));\
|
||||
@ -266,24 +144,13 @@ void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
||||
ABI_PARAM1 address
|
||||
ABI_PARAM2 address where registers are stored
|
||||
ABI_PARAM3 how many values to read/write
|
||||
ABI_PARAM4 code cycles
|
||||
|
||||
Dolphin x64CodeEmitter is my favourite assembler
|
||||
*/
|
||||
void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
{
|
||||
const u8* zero = GetCodePtr();
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM4));
|
||||
RET();
|
||||
|
||||
void* res = (void*)GetWritableCodePtr();
|
||||
|
||||
TEST(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||
J_CC(CC_Z, zero);
|
||||
|
||||
PUSH(ABI_PARAM3);
|
||||
PUSH(ABI_PARAM4); // we need you later
|
||||
|
||||
const u8* repeat = GetCodePtr();
|
||||
|
||||
if (preinc)
|
||||
@ -311,12 +178,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(12));
|
||||
MOVZX(32, 8, ABI_PARAM2, MComplex(RCPU, RSCRATCH, SCALE_4, 2 + offsetof(ARMv5, MemTimings)));
|
||||
MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_4, 3 + offsetof(ARMv5, MemTimings)));
|
||||
|
||||
FixupBranch finishIt1 = J();
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideDTCM);
|
||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
|
||||
@ -329,9 +191,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
MOV(32, R(RSCRATCH), Imm32(1)); // sequential access time
|
||||
MOV(32, R(ABI_PARAM2), Imm32(1)); // non sequential
|
||||
FixupBranch finishIt2 = J();
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideITCM);
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
@ -340,31 +200,23 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM4), currentElement);
|
||||
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
|
||||
XOR(32, R(ABI_PARAM4), R(ABI_PARAM4));
|
||||
MOV(64, MScaled(RSCRATCH, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(ABI_PARAM4));
|
||||
MOV(64, MScaled(RSCRATCH, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(ABI_PARAM4));
|
||||
|
||||
ADD(32, R(RSCRATCH), Imm32(ExeMemRegionOffsets[exeMem_ITCM]));
|
||||
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
||||
SHR(32, R(RSCRATCH), Imm8(8));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
|
||||
CALL((u8*)InvalidateByAddr);
|
||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
SetJumpTarget(noCode);
|
||||
}
|
||||
else
|
||||
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
MOV(32, R(RSCRATCH), Imm32(1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(1));
|
||||
|
||||
SetJumpTarget(finishIt1);
|
||||
SetJumpTarget(finishIt2);
|
||||
|
||||
POP(ABI_PARAM4);
|
||||
POP(ABI_PARAM3);
|
||||
|
||||
CMP(32, R(ABI_PARAM3), Imm8(1));
|
||||
FixupBranch skipSequential = J_CC(CC_E);
|
||||
SUB(32, R(ABI_PARAM3), Imm8(1));
|
||||
IMUL(32, RSCRATCH, R(ABI_PARAM3));
|
||||
ADD(32, R(ABI_PARAM2), R(RSCRATCH));
|
||||
SetJumpTarget(skipSequential);
|
||||
|
||||
CALC_CYCLES_9(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
||||
RET();
|
||||
|
||||
return res;
|
||||
@ -372,18 +224,8 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
|
||||
void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
||||
{
|
||||
const u8* zero = GetCodePtr();
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM4));
|
||||
RET();
|
||||
|
||||
void* res = (void*)GetWritableCodePtr();
|
||||
|
||||
TEST(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||
J_CC(CC_Z, zero);
|
||||
|
||||
PUSH(ABI_PARAM3);
|
||||
PUSH(ABI_PARAM4); // we need you later
|
||||
|
||||
const u8* repeat = GetCodePtr();
|
||||
|
||||
if (preinc)
|
||||
@ -403,59 +245,227 @@ void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(15));
|
||||
MOVZX(32, 8, ABI_PARAM2, MScaled(RSCRATCH, SCALE_4, 2 + squeezePointer(NDS::ARM7MemTimings)));
|
||||
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_4, 3 + squeezePointer(NDS::ARM7MemTimings)));
|
||||
|
||||
POP(ABI_PARAM4);
|
||||
POP(ABI_PARAM3);
|
||||
|
||||
// TODO: optimise this
|
||||
CMP(32, R(ABI_PARAM3), Imm8(1));
|
||||
FixupBranch skipSequential = J_CC(CC_E);
|
||||
SUB(32, R(ABI_PARAM3), Imm8(1));
|
||||
IMUL(32, RSCRATCH, R(ABI_PARAM3));
|
||||
ADD(32, R(ABI_PARAM2), R(RSCRATCH));
|
||||
SetJumpTarget(skipSequential);
|
||||
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
||||
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
||||
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
||||
CALC_CYCLES_7_DATA_MAIN_RAM(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
||||
RET();
|
||||
|
||||
SetJumpTarget(outsideMainRAM);
|
||||
CALC_CYCLES_7_DATA_NON_MAIN_RAM(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
||||
RET();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#undef CALC_CYCLES_9
|
||||
#undef MEMORY_SEQ_WHILE_COND
|
||||
|
||||
void Compiler::Comp_MemAccess(OpArg rd, bool signExtend, bool store, int size)
|
||||
void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
if (store)
|
||||
MOV(32, R(ABI_PARAM2), rd);
|
||||
u32 cycles = Num
|
||||
? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
||||
: (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
|
||||
MOV(32, R(ABI_PARAM3), Imm32(cycles));
|
||||
CALL(Num == 0
|
||||
? MemoryFuncs9[size >> 4][store]
|
||||
: MemoryFuncs7[size >> 4][store][CodeRegion == 0x02]);
|
||||
|
||||
if (!store)
|
||||
u32 val;
|
||||
// make sure arm7 bios is accessible
|
||||
u32 tmpR15 = CurCPU->R[15];
|
||||
CurCPU->R[15] = R15;
|
||||
if (size == 32)
|
||||
{
|
||||
if (signExtend)
|
||||
MOVSX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
|
||||
CurCPU->DataRead32(addr & ~0x3, &val);
|
||||
val = ROR(val, (addr & 0x3) << 3);
|
||||
}
|
||||
else if (size == 16)
|
||||
CurCPU->DataRead16(addr & ~0x1, &val);
|
||||
else
|
||||
CurCPU->DataRead8(addr, &val);
|
||||
CurCPU->R[15] = tmpR15;
|
||||
|
||||
MOV(32, MapReg(rd), Imm32(val));
|
||||
|
||||
if (Thumb || CurInstr.Cond() == 0xE)
|
||||
RegCache.PutLiteral(rd, val);
|
||||
|
||||
Comp_AddCycles_CDI();
|
||||
}
|
||||
|
||||
void fault(u32 a, u32 b)
|
||||
{
|
||||
printf("actually not static! %x %x\n", a, b);
|
||||
}
|
||||
|
||||
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
||||
{
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
Comp_AddCycles_CD();
|
||||
}
|
||||
else
|
||||
{
|
||||
Comp_AddCycles_CDI();
|
||||
}
|
||||
|
||||
u32 addressMask = ~0;
|
||||
if (size == 32)
|
||||
addressMask = ~3;
|
||||
if (size == 16)
|
||||
addressMask = ~1;
|
||||
|
||||
if (rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
|
||||
{
|
||||
Comp_MemLoadLiteral(size, rd,
|
||||
R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
OpArg rdMapped = MapReg(rd);
|
||||
OpArg rnMapped = MapReg(rn);
|
||||
|
||||
bool inlinePreparation = Num == 1;
|
||||
u32 constLocalROR32 = 4;
|
||||
|
||||
void* memoryFunc = Num == 0
|
||||
? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
|
||||
: MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
|
||||
|
||||
if ((rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
|
||||
{
|
||||
u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
|
||||
/*MOV(32, R(ABI_PARAM1), Imm32(CurInstr.Instr));
|
||||
MOV(32, R(ABI_PARAM1), Imm32(R15));
|
||||
MOV_sum(32, RSCRATCH, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
|
||||
CMP(32, R(RSCRATCH), Imm32(addr));
|
||||
FixupBranch eq = J_CC(CC_E);
|
||||
CALL((void*)fault);
|
||||
SetJumpTarget(eq);*/
|
||||
|
||||
NDS::MemRegion region;
|
||||
region.Mem = NULL;
|
||||
if (Num == 0)
|
||||
{
|
||||
ARMv5* cpu5 = (ARMv5*)CurCPU;
|
||||
|
||||
// stupid dtcm...
|
||||
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
|
||||
{
|
||||
region.Mem = cpu5->DTCM;
|
||||
region.Mask = 0x3FFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
NDS::ARM9GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||
}
|
||||
}
|
||||
else
|
||||
NDS::ARM7GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||
|
||||
if (region.Mem != NULL)
|
||||
{
|
||||
void* ptr = ®ion.Mem[addr & addressMask & region.Mask];
|
||||
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
MOV(size, M(ptr), MapReg(rd));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), M(ptr));
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), M(ptr));
|
||||
|
||||
if (size == 32 && addr & ~0x3)
|
||||
{
|
||||
ROR_(32, rdMapped, Imm8((addr & 0x3) << 3));
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
|
||||
if (specialFunc)
|
||||
{
|
||||
memoryFunc = specialFunc;
|
||||
inlinePreparation = true;
|
||||
constLocalROR32 = addr & 0x3;
|
||||
}
|
||||
}
|
||||
|
||||
X64Reg finalAddr = ABI_PARAM1;
|
||||
if (flags & memop_Post)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), rnMapped);
|
||||
|
||||
finalAddr = rnMapped.GetSimpleReg();
|
||||
}
|
||||
|
||||
if (op2.IsImm)
|
||||
{
|
||||
MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
|
||||
}
|
||||
else
|
||||
MOVZX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
|
||||
{
|
||||
OpArg rm = MapReg(op2.Reg.Reg);
|
||||
|
||||
if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
|
||||
&& op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
|
||||
{
|
||||
LEA(32, finalAddr,
|
||||
MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
bool throwAway;
|
||||
OpArg offset =
|
||||
Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
|
||||
|
||||
if (flags & memop_SubtractOffset)
|
||||
{
|
||||
MOV(32, R(finalAddr), rnMapped);
|
||||
if (!offset.IsZero())
|
||||
SUB(32, R(finalAddr), offset);
|
||||
}
|
||||
else
|
||||
MOV_sum(32, finalAddr, rnMapped, offset);
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & memop_Writeback) && !(flags & memop_Post))
|
||||
MOV(32, rnMapped, R(finalAddr));
|
||||
|
||||
if (flags & memop_Store)
|
||||
MOV(32, R(ABI_PARAM2), rdMapped);
|
||||
|
||||
if (!(flags & memop_Store) && inlinePreparation && constLocalROR32 == 4 && size == 32)
|
||||
MOV(32, rdMapped, R(ABI_PARAM1));
|
||||
|
||||
if (inlinePreparation && size > 8)
|
||||
AND(32, R(ABI_PARAM1), Imm8(addressMask));
|
||||
|
||||
CALL(memoryFunc);
|
||||
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (inlinePreparation && size == 32)
|
||||
{
|
||||
if (constLocalROR32 == 4)
|
||||
{
|
||||
static_assert(RSCRATCH3 == ECX);
|
||||
MOV(32, R(ECX), rdMapped);
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
}
|
||||
else if (constLocalROR32 != 0)
|
||||
ROR_(32, R(RSCRATCH), Imm8(constLocalROR32 << 3));
|
||||
}
|
||||
|
||||
if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||
}
|
||||
|
||||
if (!(flags & memop_Store) && rd == 15)
|
||||
{
|
||||
if (size < 32)
|
||||
printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
|
||||
{
|
||||
if (Num == 1)
|
||||
AND(32, rdMapped, Imm8(0xFE)); // immediate is sign extended
|
||||
Comp_JumpTo(rdMapped.GetSimpleReg());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -475,16 +485,13 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||
|
||||
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
|
||||
|
||||
u32 cycles = Num
|
||||
? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
||||
: (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
|
||||
|
||||
// we need to make sure that the stack stays aligned to 16 bytes
|
||||
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
|
||||
|
||||
MOV(32, R(ABI_PARAM4), Imm32(cycles));
|
||||
if (!store)
|
||||
{
|
||||
Comp_AddCycles_CDI();
|
||||
|
||||
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
||||
SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
||||
MOV(64, R(ABI_PARAM2), R(RSP));
|
||||
@ -548,6 +555,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||
}
|
||||
else
|
||||
{
|
||||
Comp_AddCycles_CD();
|
||||
|
||||
if (regsCount & 1)
|
||||
PUSH(RSCRATCH);
|
||||
|
||||
@ -594,81 +603,45 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||
return offset;
|
||||
}
|
||||
|
||||
OpArg Compiler::A_Comp_GetMemWBOffset()
|
||||
|
||||
void Compiler::A_Comp_MemWB()
|
||||
{
|
||||
bool load = CurInstr.Instr & (1 << 20);
|
||||
bool byte = CurInstr.Instr & (1 << 22);
|
||||
int size = byte ? 8 : 32;
|
||||
|
||||
int flags = 0;
|
||||
if (!load)
|
||||
flags |= memop_Store;
|
||||
if (!(CurInstr.Instr & (1 << 24)))
|
||||
flags |= memop_Post;
|
||||
if (CurInstr.Instr & (1 << 21))
|
||||
flags |= memop_Writeback;
|
||||
if (!(CurInstr.Instr & (1 << 23)))
|
||||
flags |= memop_SubtractOffset;
|
||||
|
||||
ComplexOperand offset;
|
||||
if (!(CurInstr.Instr & (1 << 25)))
|
||||
{
|
||||
u32 imm = CurInstr.Instr & 0xFFF;
|
||||
return Imm32(imm);
|
||||
offset = ComplexOperand(CurInstr.Instr & 0xFFF);
|
||||
}
|
||||
else
|
||||
{
|
||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||
int amount = (CurInstr.Instr >> 7) & 0x1F;
|
||||
OpArg rm = MapReg(CurInstr.A_Reg(0));
|
||||
bool carryUsed;
|
||||
int rm = CurInstr.A_Reg(0);
|
||||
|
||||
return Comp_RegShiftImm(op, amount, rm, false, carryUsed);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_MemWB()
|
||||
{
|
||||
OpArg rn = MapReg(CurInstr.A_Reg(16));
|
||||
OpArg rd = MapReg(CurInstr.A_Reg(12));
|
||||
bool load = CurInstr.Instr & (1 << 20);
|
||||
bool byte = CurInstr.Instr & (1 << 22);
|
||||
int size = byte ? 8 : 32;
|
||||
|
||||
if (CurInstr.Instr & (1 << 24))
|
||||
{
|
||||
OpArg offset = A_Comp_GetMemWBOffset();
|
||||
if (CurInstr.Instr & (1 << 23))
|
||||
MOV_sum(32, ABI_PARAM1, rn, offset);
|
||||
else
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), rn);
|
||||
SUB(32, R(ABI_PARAM1), offset);
|
||||
}
|
||||
|
||||
if (CurInstr.Instr & (1 << 21))
|
||||
MOV(32, rn, R(ABI_PARAM1));
|
||||
}
|
||||
else
|
||||
MOV(32, R(ABI_PARAM1), rn);
|
||||
|
||||
if (!(CurInstr.Instr & (1 << 24)))
|
||||
{
|
||||
OpArg offset = A_Comp_GetMemWBOffset();
|
||||
|
||||
if (CurInstr.Instr & (1 << 23))
|
||||
ADD(32, rn, offset);
|
||||
else
|
||||
SUB(32, rn, offset);
|
||||
offset = ComplexOperand(rm, op, amount);
|
||||
}
|
||||
|
||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
||||
if (load && CurInstr.A_Reg(12) == 15)
|
||||
{
|
||||
if (byte)
|
||||
printf("!!! LDRB PC %08X\n", R15);
|
||||
else
|
||||
{
|
||||
if (Num == 1)
|
||||
AND(32, rd, Imm8(0xFE)); // immediate is sign extended
|
||||
Comp_JumpTo(rd.GetSimpleReg());
|
||||
}
|
||||
}
|
||||
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_MemHalf()
|
||||
{
|
||||
OpArg rn = MapReg(CurInstr.A_Reg(16));
|
||||
OpArg rd = MapReg(CurInstr.A_Reg(12));
|
||||
|
||||
OpArg offset = CurInstr.Instr & (1 << 22)
|
||||
? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
|
||||
: MapReg(CurInstr.A_Reg(0));
|
||||
ComplexOperand offset = CurInstr.Instr & (1 << 22)
|
||||
? ComplexOperand(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
|
||||
: ComplexOperand(CurInstr.A_Reg(0), 0, 0);
|
||||
|
||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||
bool load = CurInstr.Instr & (1 << 20);
|
||||
@ -689,49 +662,29 @@ void Compiler::A_Comp_MemHalf()
|
||||
if (size == 32 && Num == 1)
|
||||
return; // NOP
|
||||
|
||||
if (CurInstr.Instr & (1 << 24))
|
||||
{
|
||||
if (CurInstr.Instr & (1 << 23))
|
||||
MOV_sum(32, ABI_PARAM1, rn, offset);
|
||||
else
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), rn);
|
||||
SUB(32, R(ABI_PARAM1), offset);
|
||||
}
|
||||
|
||||
if (CurInstr.Instr & (1 << 21))
|
||||
MOV(32, rn, R(ABI_PARAM1));
|
||||
}
|
||||
else
|
||||
MOV(32, R(ABI_PARAM1), rn);
|
||||
|
||||
int flags = 0;
|
||||
if (signExtend)
|
||||
flags |= memop_SignExtend;
|
||||
if (!load)
|
||||
flags |= memop_Store;
|
||||
if (!(CurInstr.Instr & (1 << 24)))
|
||||
{
|
||||
if (CurInstr.Instr & (1 << 23))
|
||||
ADD(32, rn, offset);
|
||||
else
|
||||
SUB(32, rn, offset);
|
||||
}
|
||||
flags |= memop_Post;
|
||||
if (!(CurInstr.Instr & (1 << 23)))
|
||||
flags |= memop_SubtractOffset;
|
||||
if (CurInstr.Instr & (1 << 21))
|
||||
flags |= memop_Writeback;
|
||||
|
||||
Comp_MemAccess(rd, signExtend, !load, size);
|
||||
|
||||
if (load && CurInstr.A_Reg(12) == 15)
|
||||
printf("!!! MemHalf op PC %08X\n", R15);;
|
||||
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemReg()
|
||||
{
|
||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
||||
OpArg ro = MapReg(CurInstr.T_Reg(6));
|
||||
|
||||
int op = (CurInstr.Instr >> 10) & 0x3;
|
||||
bool load = op & 0x2;
|
||||
bool byte = op & 0x1;
|
||||
|
||||
MOV_sum(32, ABI_PARAM1, rb, ro);
|
||||
|
||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(CurInstr.T_Reg(6), 0, 0),
|
||||
byte ? 8 : 32, load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_LDM_STM()
|
||||
@ -758,67 +711,55 @@ void Compiler::A_Comp_LDM_STM()
|
||||
|
||||
void Compiler::T_Comp_MemImm()
|
||||
{
|
||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
||||
|
||||
int op = (CurInstr.Instr >> 11) & 0x3;
|
||||
bool load = op & 0x1;
|
||||
bool byte = op & 0x2;
|
||||
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
|
||||
|
||||
LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
|
||||
|
||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(offset),
|
||||
byte ? 8 : 32, load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemRegHalf()
|
||||
{
|
||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
||||
OpArg ro = MapReg(CurInstr.T_Reg(6));
|
||||
|
||||
int op = (CurInstr.Instr >> 10) & 0x3;
|
||||
bool load = op != 0;
|
||||
int size = op != 1 ? 16 : 8;
|
||||
bool signExtend = op & 1;
|
||||
|
||||
MOV_sum(32, ABI_PARAM1, rb, ro);
|
||||
int flags = 0;
|
||||
if (signExtend)
|
||||
flags |= memop_SignExtend;
|
||||
if (!load)
|
||||
flags |= memop_Store;
|
||||
|
||||
Comp_MemAccess(rd, signExtend, !load, size);
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(CurInstr.T_Reg(6), 0, 0),
|
||||
size, flags);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemImmHalf()
|
||||
{
|
||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
||||
|
||||
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
|
||||
bool load = CurInstr.Instr & (1 << 11);
|
||||
|
||||
LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
|
||||
|
||||
Comp_MemAccess(rd, false, !load, 16);
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(offset), 16,
|
||||
load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_LoadPCRel()
|
||||
{
|
||||
OpArg rd = MapReg(CurInstr.T_Reg(8));
|
||||
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
|
||||
|
||||
// hopefully this doesn't break
|
||||
u32 val; CurCPU->DataRead32(addr, &val);
|
||||
MOV(32, rd, Imm32(val));
|
||||
Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemSPRel()
|
||||
{
|
||||
u32 offset = (CurInstr.Instr & 0xFF) * 4;
|
||||
OpArg rd = MapReg(CurInstr.T_Reg(8));
|
||||
bool load = CurInstr.Instr & (1 << 11);
|
||||
|
||||
LEA(32, ABI_PARAM1, MDisp(MapReg(13).GetSimpleReg(), offset));
|
||||
|
||||
Comp_MemAccess(rd, false, !load, 32);
|
||||
Comp_MemAccess(CurInstr.T_Reg(8), 13, ComplexOperand(offset), 32,
|
||||
load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_PUSH_POP()
|
||||
|
Reference in New Issue
Block a user