Multiinstance jit (#2201)

* works on Linux x64
still needs to be fixed for everything else

* use lots of PROT_NONE memory to reliably reserve virtual address space

* multi instance fastmem on Linux

* Windows

* blarg

* disable fastmem if the page size is not 4kb

* fix fast mem dialog option

* make aarch64 work as well

* fastmem 16kb pages support
This commit is contained in:
Kemal Afzal
2024-11-18 20:43:05 +01:00
committed by GitHub
parent cb7af652f5
commit 99ce959913
19 changed files with 573 additions and 265 deletions

View File

@ -176,9 +176,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0)
CALL((void*)&ARMv5JumpToTrampoline);
ABI_CallFunction(ARMv5JumpToTrampoline);
else
CALL((void*)&ARMv4JumpToTrampoline);
ABI_CallFunction(ARMv4JumpToTrampoline);
PopRegs(restoreCPSR, true);

View File

@ -21,19 +21,13 @@
#include "../ARMJIT.h"
#include "../ARMInterpreter.h"
#include "../NDS.h"
#include "../ARMJIT_Global.h"
#include <assert.h>
#include <stdarg.h>
#include "../dolphin/CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
using namespace Gen;
using namespace Common;
@ -222,46 +216,21 @@ void Compiler::A_Comp_MSR()
MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((void*)&UpdateModeTrampoline);
ABI_CallFunction(UpdateModeTrampoline);
PopRegs(true, true);
}
}
}
/*
We'll repurpose this .bss memory
*/
u8 CodeMemory[1024 * 1024 * 32];
Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
{
{
#ifdef _WIN32
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
ARMJIT_Global::Init();
u64 pageSize = (u64)sysInfo.dwPageSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
#endif
CodeMemBase = static_cast<u8*>(ARMJIT_Global::AllocateCodeMem());
CodeMemSize = ARMJIT_Global::CodeMemorySliceSize;
u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize);
u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned;
#ifdef _WIN32
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
ResetStart = pageAligned;
CodeMemSize = alignedSize;
}
ResetStart = CodeMemBase;
Reset();
@ -475,6 +444,13 @@ Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
FarSize = (ResetStart + CodeMemSize) - FarStart;
}
Compiler::~Compiler()
{
ARMJIT_Global::FreeCodeMem(CodeMemBase);
ARMJIT_Global::DeInit();
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
@ -684,7 +660,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)&ARM_Ret, true);
ABI_TailCall(ARM_Ret);
}
}
@ -846,7 +822,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)ARM_Ret, true);
ABI_TailCall(ARM_Ret);
#ifdef JIT_PROFILING_ENABLED
CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr);

View File

@ -84,6 +84,7 @@ class Compiler : public Gen::XEmitter
{
public:
explicit Compiler(melonDS::NDS& nds);
~Compiler();
void Reset();
@ -256,6 +257,7 @@ public:
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {};
u8* CodeMemBase;
u8* ResetStart {};
u32 CodeMemSize {};

View File

@ -316,24 +316,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
{
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowWrite9<u32, 0>); break;
case 16: CALL((void*)&SlowWrite9<u16, 0>); break;
case 8: CALL((void*)&SlowWrite9<u8, 0>); break;
case 33: CALL((void*)&SlowWrite9<u32, 1>); break;
case 17: CALL((void*)&SlowWrite9<u16, 1>); break;
case 9: CALL((void*)&SlowWrite9<u8, 1>); break;
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 8: ABI_CallFunction(&SlowWrite9<u8, 0>); break;
case 33: ABI_CallFunction(&SlowWrite9<u32, 1>); break;
case 17: ABI_CallFunction(&SlowWrite9<u16, 1>); break;
case 9: ABI_CallFunction(&SlowWrite9<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowRead9<u32, 0>); break;
case 16: CALL((void*)&SlowRead9<u16, 0>); break;
case 8: CALL((void*)&SlowRead9<u8, 0>); break;
case 33: CALL((void*)&SlowRead9<u32, 1>); break;
case 17: CALL((void*)&SlowRead9<u16, 1>); break;
case 9: CALL((void*)&SlowRead9<u8, 1>); break;
case 32: ABI_CallFunction(&SlowRead9<u32, 0>); break;
case 16: ABI_CallFunction(&SlowRead9<u16, 0>); break;
case 8: ABI_CallFunction(&SlowRead9<u8, 0>); break;
case 33: ABI_CallFunction(&SlowRead9<u32, 1>); break;
case 17: ABI_CallFunction(&SlowRead9<u16, 1>); break;
case 9: ABI_CallFunction(&SlowRead9<u8, 1>); break;
}
}
}
@ -347,24 +347,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowWrite7<u32, 0>); break;
case 16: CALL((void*)&SlowWrite7<u16, 0>); break;
case 8: CALL((void*)&SlowWrite7<u8, 0>); break;
case 33: CALL((void*)&SlowWrite7<u32, 1>); break;
case 17: CALL((void*)&SlowWrite7<u16, 1>); break;
case 9: CALL((void*)&SlowWrite7<u8, 1>); break;
case 32: ABI_CallFunction(&SlowWrite7<u32, 0>); break;
case 16: ABI_CallFunction(&SlowWrite7<u16, 0>); break;
case 8: ABI_CallFunction(&SlowWrite7<u8, 0>); break;
case 33: ABI_CallFunction(&SlowWrite7<u32, 1>); break;
case 17: ABI_CallFunction(&SlowWrite7<u16, 1>); break;
case 9: ABI_CallFunction(&SlowWrite7<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowRead7<u32, 0>); break;
case 16: CALL((void*)&SlowRead7<u16, 0>); break;
case 8: CALL((void*)&SlowRead7<u8, 0>); break;
case 33: CALL((void*)&SlowRead7<u32, 1>); break;
case 17: CALL((void*)&SlowRead7<u16, 1>); break;
case 9: CALL((void*)&SlowRead7<u8, 1>); break;
case 32: ABI_CallFunction(&SlowRead7<u32, 0>); break;
case 16: ABI_CallFunction(&SlowRead7<u16, 0>); break;
case 8: ABI_CallFunction(&SlowRead7<u8, 0>); break;
case 33: ABI_CallFunction(&SlowRead7<u32, 1>); break;
case 17: ABI_CallFunction(&SlowRead7<u16, 1>); break;
case 9: ABI_CallFunction(&SlowRead7<u8, 1>); break;
}
}
}
@ -526,10 +526,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
switch (Num * 2 | NDS.ConsoleType)
{
case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
case 0: ABI_CallFunction(&SlowBlockTransfer9<false, 0>); break;
case 1: ABI_CallFunction(&SlowBlockTransfer9<false, 1>); break;
case 2: ABI_CallFunction(&SlowBlockTransfer7<false, 0>); break;
case 3: ABI_CallFunction(&SlowBlockTransfer7<false, 1>); break;
}
PopRegs(false, false);
@ -630,10 +630,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
switch (Num * 2 | NDS.ConsoleType)
{
case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break;
case 0: ABI_CallFunction(&SlowBlockTransfer9<true, 0>); break;
case 1: ABI_CallFunction(&SlowBlockTransfer9<true, 1>); break;
case 2: ABI_CallFunction(&SlowBlockTransfer7<true, 0>); break;
case 3: ABI_CallFunction(&SlowBlockTransfer7<true, 1>); break;
}
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));