mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-29 00:59:56 -06:00
implement block linking + some refactoring
currently only supported for x64
This commit is contained in:
@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||
ConstantCycles += cycles;
|
||||
else
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
}
|
||||
|
||||
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||
@ -135,7 +135,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||
IrregularCycles = true;
|
||||
|
||||
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
|
||||
bool previouslyDirty = CPSRDirty;
|
||||
bool cpsrDirty = CPSRDirty;
|
||||
SaveCPSR();
|
||||
|
||||
if (restoreCPSR)
|
||||
@ -168,9 +168,10 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||
LoadReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
|
||||
if (previouslyDirty)
|
||||
LoadCPSR();
|
||||
CPSRDirty = previouslyDirty;
|
||||
LoadCPSR();
|
||||
// in case this instruction is skipped
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
CPSRDirty = cpsrDirty;
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_BranchImm()
|
||||
@ -209,20 +210,12 @@ void Compiler::T_Comp_BCOND()
|
||||
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
||||
Comp_JumpTo(R15 + offset + 1, true);
|
||||
|
||||
Comp_SpecialBranchBehaviour();
|
||||
Comp_SpecialBranchBehaviour(true);
|
||||
|
||||
FixupBranch skipFailed = J();
|
||||
SetJumpTarget(skipExecute);
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||
{
|
||||
RegCache.PrepareExit();
|
||||
SaveCPSR(false);
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
}
|
||||
Comp_SpecialBranchBehaviour(false);
|
||||
|
||||
Comp_AddCycles_C(true);
|
||||
SetJumpTarget(skipFailed);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "ARMJIT_Compiler.h"
|
||||
|
||||
#include "../ARMInterpreter.h"
|
||||
#include "../Config.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
@ -15,6 +16,8 @@
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
extern "C" void ARM_Ret();
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
template <>
|
||||
@ -170,6 +173,24 @@ Compiler::Compiler()
|
||||
RET();
|
||||
}
|
||||
|
||||
{
|
||||
CPSRDirty = true;
|
||||
BranchStub[0] = GetWritableCodePtr();
|
||||
SaveCPSR();
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
CALL((u8*)ARMJIT::LinkBlock<0>);
|
||||
LoadCPSR();
|
||||
JMP((u8*)ARM_Ret, true);
|
||||
|
||||
CPSRDirty = true;
|
||||
BranchStub[1] = GetWritableCodePtr();
|
||||
SaveCPSR();
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
CALL((u8*)ARMJIT::LinkBlock<1>);
|
||||
LoadCPSR();
|
||||
JMP((u8*)ARM_Ret, true);
|
||||
}
|
||||
|
||||
// move the region forward to prevent overwriting the generated functions
|
||||
CodeMemSize -= GetWritableCodePtr() - ResetStart;
|
||||
ResetStart = GetWritableCodePtr();
|
||||
@ -362,23 +383,43 @@ void Compiler::Reset()
|
||||
SetCodePtr(ResetStart);
|
||||
}
|
||||
|
||||
void Compiler::Comp_SpecialBranchBehaviour()
|
||||
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
|
||||
{
|
||||
if (CurInstr.BranchFlags & branch_IdleBranch)
|
||||
OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
|
||||
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
|
||||
OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
||||
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|
||||
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
|
||||
{
|
||||
RegCache.PrepareExit();
|
||||
SaveCPSR(false);
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
||||
|
||||
if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch)
|
||||
&& (!taken || (CurInstr.BranchFlags & branch_StaticTarget)))
|
||||
{
|
||||
FixupBranch ret = J_CC(CC_S);
|
||||
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
|
||||
FixupBranch ret2 = J_CC(CC_NZ);
|
||||
|
||||
u8* rewritePart = GetWritableCodePtr();
|
||||
NOP(5);
|
||||
|
||||
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
|
||||
JMP((u8*)BranchStub[Num], true);
|
||||
|
||||
SetJumpTarget(ret);
|
||||
SetJumpTarget(ret2);
|
||||
JMP((u8*)ARM_Ret, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
JMP((u8*)&ARM_Ret, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||
{
|
||||
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
||||
ResetBlockCache();
|
||||
@ -388,15 +429,11 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||
Num = cpu->Num;
|
||||
CodeRegion = instrs[0].Addr >> 24;
|
||||
CurCPU = cpu;
|
||||
// CPSR might have been modified in a previous block
|
||||
CPSRDirty = Config::JIT_BrancheOptimisations == 2;
|
||||
|
||||
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
|
||||
MOV(64, R(RCPU), ImmPtr(cpu));
|
||||
|
||||
LoadCPSR();
|
||||
|
||||
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
||||
|
||||
for (int i = 0; i < instrsCount; i++)
|
||||
@ -474,7 +511,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||
else
|
||||
(this->*comp)();
|
||||
|
||||
Comp_SpecialBranchBehaviour();
|
||||
Comp_SpecialBranchBehaviour(true);
|
||||
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
{
|
||||
@ -485,15 +522,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||
|
||||
Comp_AddCycles_C(true);
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||
{
|
||||
RegCache.PrepareExit();
|
||||
SaveCPSR(false);
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
}
|
||||
Comp_SpecialBranchBehaviour(false);
|
||||
|
||||
SetJumpTarget(skipFailed);
|
||||
}
|
||||
@ -504,17 +533,38 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||
}
|
||||
}
|
||||
|
||||
if (comp == NULL && i != instrsCount - 1)
|
||||
if (comp == NULL)
|
||||
LoadCPSR();
|
||||
}
|
||||
|
||||
RegCache.Flush();
|
||||
SaveCPSR();
|
||||
|
||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||
RET();
|
||||
if (Config::JIT_BrancheOptimisations == 2
|
||||
&& !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch)
|
||||
&& (!instrs[instrsCount - 1].Info.Branches()
|
||||
|| instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken
|
||||
|| (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget)))
|
||||
{
|
||||
FixupBranch ret = J_CC(CC_S);
|
||||
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
|
||||
FixupBranch ret2 = J_CC(CC_NZ);
|
||||
|
||||
u8* rewritePart = GetWritableCodePtr();
|
||||
NOP(5);
|
||||
|
||||
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
|
||||
JMP((u8*)BranchStub[Num], true);
|
||||
|
||||
SetJumpTarget(ret);
|
||||
SetJumpTarget(ret2);
|
||||
JMP((u8*)ARM_Ret, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
JMP((u8*)ARM_Ret, true);
|
||||
}
|
||||
|
||||
/*FILE* codeout = fopen("codeout", "a");
|
||||
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
|
||||
@ -525,6 +575,22 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||
return res;
|
||||
}
|
||||
|
||||
void Compiler::LinkBlock(u32 offset, JitBlockEntry entry)
|
||||
{
|
||||
u8* curPtr = GetWritableCodePtr();
|
||||
SetCodePtr(ResetStart + offset);
|
||||
JMP((u8*)entry, true);
|
||||
SetCodePtr(curPtr);
|
||||
}
|
||||
|
||||
void Compiler::UnlinkBlock(u32 offset)
|
||||
{
|
||||
u8* curPtr = GetWritableCodePtr();
|
||||
SetCodePtr(ResetStart + offset);
|
||||
NOP(5);
|
||||
SetCodePtr(curPtr);
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
|
||||
{
|
||||
s32 cycles = Num ?
|
||||
@ -532,7 +598,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant)
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
||||
|
||||
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
@ -544,7 +610,7 @@ void Compiler::Comp_AddCycles_CI(u32 i)
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
|
||||
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
@ -558,12 +624,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
{
|
||||
LEA(32, RSCRATCH, MDisp(i, add + cycles));
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
ConstantCycles += i + cycles;
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
|
||||
}
|
||||
}
|
||||
|
||||
@ -599,7 +665,7 @@ void Compiler::Comp_AddCycles_CDI()
|
||||
}
|
||||
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
@ -643,7 +709,7 @@ void Compiler::Comp_AddCycles_CD()
|
||||
}
|
||||
|
||||
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
|
@ -51,7 +51,10 @@ public:
|
||||
|
||||
void Reset();
|
||||
|
||||
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||
void LinkBlock(u32 offset, JitBlockEntry entry);
|
||||
void UnlinkBlock(u32 offset);
|
||||
|
||||
JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||
|
||||
void LoadReg(int reg, Gen::X64Reg nativeReg);
|
||||
void SaveReg(int reg, Gen::X64Reg nativeReg);
|
||||
@ -145,7 +148,7 @@ public:
|
||||
|
||||
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
|
||||
|
||||
void Comp_SpecialBranchBehaviour();
|
||||
void Comp_SpecialBranchBehaviour(bool taken);
|
||||
|
||||
void* Gen_MemoryRoutine9(bool store, int size);
|
||||
|
||||
@ -176,12 +179,24 @@ public:
|
||||
return Gen::R(RegCache.Mapping[reg]);
|
||||
}
|
||||
|
||||
JitBlockEntry AddEntryOffset(u32 offset)
|
||||
{
|
||||
return (JitBlockEntry)(ResetStart + offset);
|
||||
}
|
||||
|
||||
u32 SubEntryOffset(JitBlockEntry entry)
|
||||
{
|
||||
return (u8*)entry - ResetStart;
|
||||
}
|
||||
|
||||
u8* ResetStart;
|
||||
u32 CodeMemSize;
|
||||
|
||||
bool Exit;
|
||||
bool IrregularCycles;
|
||||
|
||||
void* BranchStub[2];
|
||||
|
||||
void* MemoryFuncs9[3][2];
|
||||
void* MemoryFuncs7[3][2];
|
||||
|
||||
|
15
src/ARMJIT_x64/ARMJIT_GenOffsets.cpp
Normal file
15
src/ARMJIT_x64/ARMJIT_GenOffsets.cpp
Normal file
@ -0,0 +1,15 @@
|
||||
#include "../ARM.h"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
FILE* f = fopen("ARMJIT_Offsets.h", "w");
|
||||
#define writeOffset(field) \
|
||||
fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
|
||||
|
||||
writeOffset(CPSR);
|
||||
writeOffset(Cycles);
|
||||
writeOffset(StopExecution);
|
||||
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
74
src/ARMJIT_x64/ARMJIT_Linkage.s
Normal file
74
src/ARMJIT_x64/ARMJIT_Linkage.s
Normal file
@ -0,0 +1,74 @@
|
||||
.intel_syntax noprefix
|
||||
|
||||
#include "ARMJIT_Offsets.h"
|
||||
|
||||
.text
|
||||
|
||||
#define RCPU rbp
|
||||
#define RCPSR r15d
|
||||
|
||||
#ifdef WIN64
|
||||
#define ARG1_REG ecx
|
||||
#define ARG2_REG edx
|
||||
#define ARG3_REG r8d
|
||||
#define ARG4_REG r9d
|
||||
#define ARG1_REG64 rcx
|
||||
#define ARG2_REG64 rdx
|
||||
#define ARG3_REG64 r8
|
||||
#define ARG4_REG64 r9
|
||||
#else
|
||||
#define ARG1_REG edi
|
||||
#define ARG2_REG esi
|
||||
#define ARG3_REG edx
|
||||
#define ARG4_REG ecx
|
||||
#define ARG1_REG64 rdi
|
||||
#define ARG2_REG64 rsi
|
||||
#define ARG3_REG64 rdx
|
||||
#define ARG4_REG64 rcx
|
||||
#endif
|
||||
|
||||
.p2align 4,,15
|
||||
|
||||
.global ARM_Dispatch
|
||||
ARM_Dispatch:
|
||||
#ifdef WIN64
|
||||
push rdi
|
||||
push rsi
|
||||
#endif
|
||||
push rbx
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rbp
|
||||
|
||||
#ifdef WIN64
|
||||
sub rsp, 0x28
|
||||
#endif
|
||||
mov RCPU, ARG1_REG64
|
||||
mov RCPSR, [RCPU + ARM_CPSR_offset]
|
||||
|
||||
jmp ARG2_REG64
|
||||
|
||||
.p2align 4,,15
|
||||
|
||||
.global ARM_Ret
|
||||
ARM_Ret:
|
||||
mov [RCPU + ARM_CPSR_offset], RCPSR
|
||||
|
||||
#ifdef WIN64
|
||||
add rsp, 0x28
|
||||
#endif
|
||||
|
||||
pop rbp
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbx
|
||||
#ifdef WIN64
|
||||
pop rsi
|
||||
pop rdi
|
||||
#endif
|
||||
|
||||
ret
|
3
src/ARMJIT_x64/ARMJIT_Offsets.h
Normal file
3
src/ARMJIT_x64/ARMJIT_Offsets.h
Normal file
@ -0,0 +1,3 @@
|
||||
#define ARM_CPSR_offset 0x64
|
||||
#define ARM_Cycles_offset 0xc
|
||||
#define ARM_StopExecution_offset 0x10
|
Reference in New Issue
Block a user