Merge pull request #10055 from JosJuice/jitarm64-reuse-memory

JitArm64: Codegen space reuse
This commit is contained in:
JMC47 2021-11-20 17:35:24 -05:00 committed by GitHub
commit e5a4a86672
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 283 additions and 110 deletions

View File

@ -71,14 +71,16 @@ std::optional<u8> FPImm8FromFloat(float value)
}
} // Anonymous namespace
void ARM64XEmitter::SetCodePtrUnsafe(u8* ptr)
void ARM64XEmitter::SetCodePtrUnsafe(u8* ptr, u8* end, bool write_failed)
{
m_code = ptr;
m_code_end = end;
m_write_failed = write_failed;
}
void ARM64XEmitter::SetCodePtr(u8* ptr, u8* end, bool write_failed)
{
SetCodePtrUnsafe(ptr);
SetCodePtrUnsafe(ptr, end, write_failed);
m_lastCacheFlushEnd = ptr;
}
@ -92,6 +94,16 @@ u8* ARM64XEmitter::GetWritableCodePtr()
return m_code;
}
const u8* ARM64XEmitter::GetCodeEnd() const
{
return m_code_end;
}
u8* ARM64XEmitter::GetWritableCodeEnd()
{
return m_code_end;
}
void ARM64XEmitter::ReserveCodeSpace(u32 bytes)
{
for (u32 i = 0; i < bytes / 4; i++)
@ -116,6 +128,13 @@ u8* ARM64XEmitter::AlignCodePage()
void ARM64XEmitter::Write32(u32 value)
{
if (m_code + sizeof(u32) > m_code_end)
{
m_code = m_code_end;
m_write_failed = true;
return;
}
std::memcpy(m_code, &value, sizeof(u32));
m_code += sizeof(u32);
}
@ -659,6 +678,9 @@ static constexpr u32 MaskImm26(s64 distance)
// FixupBranch branching
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
{
if (!branch.ptr)
return;
bool Not = false;
u32 inst = 0;
s64 distance = (s64)(m_code - branch.ptr);
@ -709,67 +731,68 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
std::memcpy(branch.ptr, &inst, sizeof(inst));
}
FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt)
FixupBranch ARM64XEmitter::WriteFixupBranch()
{
FixupBranch branch{};
branch.ptr = m_code;
BRK(0);
// If we couldn't write the full jump instruction, indicate that in the returned FixupBranch by
// setting the branch's address to null. This will prevent a later SetJumpTarget() from writing to
// invalid memory.
if (HasWriteFailed())
branch.ptr = nullptr;
return branch;
}
FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt)
{
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::CBZ;
branch.reg = Rt;
NOP();
return branch;
}
FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt)
{
FixupBranch branch{};
branch.ptr = m_code;
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::CBNZ;
branch.reg = Rt;
NOP();
return branch;
}
FixupBranch ARM64XEmitter::B(CCFlags cond)
{
FixupBranch branch{};
branch.ptr = m_code;
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::BConditional;
branch.cond = cond;
NOP();
return branch;
}
FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit)
{
FixupBranch branch{};
branch.ptr = m_code;
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::TBZ;
branch.reg = Rt;
branch.bit = bit;
NOP();
return branch;
}
FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit)
{
FixupBranch branch{};
branch.ptr = m_code;
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::TBNZ;
branch.reg = Rt;
branch.bit = bit;
NOP();
return branch;
}
FixupBranch ARM64XEmitter::B()
{
FixupBranch branch{};
branch.ptr = m_code;
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::B;
NOP();
return branch;
}
FixupBranch ARM64XEmitter::BL()
{
FixupBranch branch{};
branch.ptr = m_code;
FixupBranch branch = WriteFixupBranch();
branch.type = FixupBranch::Type::BL;
NOP();
return branch;
}
@ -1945,12 +1968,12 @@ bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2)
MOVI2R(Rd, imm1);
int size1 = GetCodePtr() - start_pointer;
SetCodePtrUnsafe(start_pointer);
m_code = start_pointer;
MOVI2R(Rd, imm2);
int size2 = GetCodePtr() - start_pointer;
SetCodePtrUnsafe(start_pointer);
m_code = start_pointer;
bool element = size1 > size2;

View File

@ -725,8 +725,18 @@ class ARM64XEmitter
friend class ARM64FloatEmitter;
private:
u8* m_code;
u8* m_lastCacheFlushEnd;
// Pointer to memory where code will be emitted to.
u8* m_code = nullptr;
// Pointer past the end of the memory region we're allowed to emit to.
// Writes that would reach this memory are refused and will set the m_write_failed flag instead.
u8* m_code_end = nullptr;
u8* m_lastCacheFlushEnd = nullptr;
// Set to true when a write request happens that would write past m_code_end.
// Must be cleared with SetCodePtr() afterwards.
bool m_write_failed = false;
void AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, bool flags);
void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);
@ -760,6 +770,8 @@ private:
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
FixupBranch WriteFixupBranch();
template <typename T>
void MOVI2RImpl(ARM64Reg Rd, T imm);
@ -767,27 +779,30 @@ protected:
void Write32(u32 value);
public:
ARM64XEmitter() : m_code(nullptr), m_lastCacheFlushEnd(nullptr) {}
ARM64XEmitter(u8* code_ptr)
ARM64XEmitter() = default;
ARM64XEmitter(u8* code, u8* code_end)
: m_code(code), m_code_end(code_end), m_lastCacheFlushEnd(code)
{
m_code = code_ptr;
m_lastCacheFlushEnd = code_ptr;
}
virtual ~ARM64XEmitter() {}
// 'end' and 'write_failed' are unused in the ARM code emitter at the moment.
// They're just here for interface compatibility with the x64 code emitter.
void SetCodePtr(u8* ptr, u8* end, bool write_failed = false);
void SetCodePtrUnsafe(u8* ptr);
void SetCodePtrUnsafe(u8* ptr, u8* end, bool write_failed = false);
const u8* GetCodePtr() const;
u8* GetWritableCodePtr();
const u8* GetCodeEnd() const;
u8* GetWritableCodeEnd();
void ReserveCodeSpace(u32 bytes);
u8* AlignCode16();
u8* AlignCodePage();
const u8* GetCodePtr() const;
void FlushIcache();
void FlushIcacheSection(u8* start, u8* end);
u8* GetWritableCodePtr();
// Should be checked after a block of code has been generated to see if the code has been
// successfully written to memory. Do not call the generated code when this returns true!
bool HasWriteFailed() const { return m_write_failed; }
// FixupBranch branching
void SetJumpTarget(FixupBranch const& branch);

View File

@ -9,6 +9,7 @@
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
#include "Common/PerformanceCounter.h"
#include "Common/StringUtil.h"
@ -45,7 +46,7 @@ void JitArm64::Init()
{
const size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE;
AllocCodeSpace(CODE_SIZE + child_code_size);
AddChildCodeSpace(&farcode, child_code_size);
AddChildCodeSpace(&m_far_code, child_code_size);
jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
jo.enableBlocklink = true;
@ -68,6 +69,8 @@ void JitArm64::Init()
AllocStack();
GenerateAsm();
ResetFreeMemoryRanges();
}
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
@ -123,15 +126,26 @@ bool JitArm64::HandleStackFault()
void JitArm64::ClearCache()
{
m_fault_to_handler.clear();
m_handler_to_loc.clear();
blocks.Clear();
blocks.ClearRangesToFree();
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ClearCodeSpace();
farcode.ClearCodeSpace();
m_far_code.ClearCodeSpace();
UpdateMemoryAndExceptionOptions();
GenerateAsm();
ResetFreeMemoryRanges();
}
void JitArm64::ResetFreeMemoryRanges()
{
// Set the near and far code regions as unused.
m_free_ranges_near.clear();
m_free_ranges_near.insert(GetWritableCodePtr(), GetWritableCodeEnd());
m_free_ranges_far.clear();
m_free_ranges_far.insert(m_far_code.GetWritableCodePtr(), m_far_code.GetWritableCodeEnd());
}
void JitArm64::Shutdown()
@ -577,7 +591,12 @@ void JitArm64::SingleStep()
pExecAddr();
}
void JitArm64::Jit(u32)
void JitArm64::Jit(u32 em_address)
{
Jit(em_address, true);
}
void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
{
if (m_cleanup_after_stackfault)
{
@ -589,14 +608,31 @@ void JitArm64::Jit(u32)
#endif
}
if (IsAlmostFull() || farcode.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
{
if (SConfig::GetInstance().bJITNoBlockCache)
ClearCache();
// Check if any code blocks have been freed in the block cache and transfer this information to
// the local rangesets to allow overwriting them with new code.
for (auto range : blocks.GetRangesToFreeNear())
{
auto first_fastmem_area = m_fault_to_handler.upper_bound(range.first);
auto last_fastmem_area = first_fastmem_area;
auto end = m_fault_to_handler.end();
while (last_fastmem_area != end && last_fastmem_area->first <= range.second)
++last_fastmem_area;
m_fault_to_handler.erase(first_fastmem_area, last_fastmem_area);
m_free_ranges_near.insert(range.first, range.second);
}
for (auto range : blocks.GetRangesToFreeFar())
{
m_free_ranges_far.insert(range.first, range.second);
}
blocks.ClearRangesToFree();
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
std::size_t block_size = m_code_buffer.size();
const u32 em_address = PowerPC::ppcState.pc;
if (SConfig::GetInstance().bEnableDebugging)
{
@ -619,12 +655,75 @@ void JitArm64::Jit(u32)
return;
}
if (SetEmitterStateToFreeCodeRegion())
{
u8* near_start = GetWritableCodePtr();
u8* far_start = m_far_code.GetWritableCodePtr();
JitBlock* b = blocks.AllocateBlock(em_address);
DoJit(em_address, b, nextPC);
if (DoJit(em_address, b, nextPC))
{
// Code generation succeeded.
// Mark the memory regions that this code block uses as used in the local rangesets.
u8* near_end = GetWritableCodePtr();
if (near_start != near_end)
m_free_ranges_near.erase(near_start, near_end);
u8* far_end = m_far_code.GetWritableCodePtr();
if (far_start != far_end)
m_free_ranges_far.erase(far_start, far_end);
// Store the used memory regions in the block so we know what to mark as unused when the
// block gets invalidated.
b->near_begin = near_start;
b->near_end = near_end;
b->far_begin = far_start;
b->far_end = far_end;
blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
return;
}
}
void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (clear_cache_and_retry_on_failure)
{
// Code generation failed due to not enough free space in either the near or far code regions.
// Clear the entire JIT cache and retry.
WARN_LOG(POWERPC, "flushing code caches, please report if this happens a lot");
ClearCache();
Jit(em_address, false);
return;
}
PanicAlertT("JIT failed to find code space after a cache clear. This should never happen. Please "
"report this incident on the bug tracker. Dolphin will now exit.");
exit(-1);
}
bool JitArm64::SetEmitterStateToFreeCodeRegion()
{
// Find the largest free memory blocks and set code emitters to point at them.
// If we can't find a free block return false instead, which will trigger a JIT cache clear.
auto free_near = m_free_ranges_near.by_size_begin();
if (free_near == m_free_ranges_near.by_size_end())
{
WARN_LOG(POWERPC, "Failed to find free memory region in near code region.");
return false;
}
SetCodePtr(free_near.from(), free_near.to());
auto free_far = m_free_ranges_far.by_size_begin();
if (free_far == m_free_ranges_far.by_size_end())
{
WARN_LOG(POWERPC, "Failed to find free memory region in far code region.");
return false;
}
m_far_code.SetCodePtr(free_far.from(), free_far.to());
return true;
}
bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
{
js.isLastInstruction = false;
js.firstFPInstructionFound = false;
@ -871,9 +970,21 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
WriteExit(nextPC);
}
if (HasWriteFailed() || m_far_code.HasWriteFailed())
{
if (HasWriteFailed())
WARN_LOG(POWERPC, "JIT ran out of space in near code region during code generation.");
if (m_far_code.HasWriteFailed())
WARN_LOG(POWERPC, "JIT ran out of space in far code region during code generation.");
return false;
}
b->codeSize = (u32)(GetCodePtr() - start);
b->originalSize = code_block.m_num_instructions;
FlushIcache();
farcode.FlushIcache();
m_far_code.FlushIcache();
return true;
}

View File

@ -7,6 +7,8 @@
#include <map>
#include <tuple>
#include <rangeset/rangesizeset.h>
#include "Common/Arm64Emitter.h"
#include "Core/PowerPC/CPUCoreBase.h"
@ -39,7 +41,8 @@ public:
void Run() override;
void SingleStep() override;
void Jit(u32) override;
void Jit(u32 em_address) override;
void Jit(u32 em_address, bool clear_cache_and_retry_on_failure);
const char* GetName() const override { return "JITARM64"; }
@ -178,21 +181,6 @@ public:
bool IsFPRStoreSafe(size_t guest_reg) const;
protected:
struct SlowmemHandler
{
Arm64Gen::ARM64Reg dest_reg;
Arm64Gen::ARM64Reg addr_reg;
BitSet32 gprs;
BitSet32 fprs;
u32 flags;
bool operator<(const SlowmemHandler& rhs) const
{
return std::tie(dest_reg, addr_reg, gprs, fprs, flags) <
std::tie(rhs.dest_reg, rhs.addr_reg, rhs.gprs, rhs.fprs, rhs.flags);
}
};
struct FastmemArea
{
const u8* fastmem_code;
@ -206,20 +194,23 @@ protected:
// Simple functions to switch between near and far code emitting
void SwitchToFarCode()
{
nearcode = GetWritableCodePtr();
SetCodePtrUnsafe(farcode.GetWritableCodePtr());
m_near_code = GetWritableCodePtr();
m_near_code_end = GetWritableCodeEnd();
m_near_code_write_failed = HasWriteFailed();
SetCodePtrUnsafe(m_far_code.GetWritableCodePtr(), m_far_code.GetWritableCodeEnd(),
m_far_code.HasWriteFailed());
AlignCode16();
m_in_farcode = true;
m_in_far_code = true;
}
void SwitchToNearCode()
{
farcode.SetCodePtrUnsafe(GetWritableCodePtr());
SetCodePtrUnsafe(nearcode);
m_in_farcode = false;
m_far_code.SetCodePtrUnsafe(GetWritableCodePtr(), GetWritableCodeEnd(), HasWriteFailed());
SetCodePtrUnsafe(m_near_code, m_near_code_end, m_near_code_write_failed);
m_in_far_code = false;
}
bool IsInFarCode() const { return m_in_farcode; }
bool IsInFarCode() const { return m_in_far_code; }
// Dump a memory range of code
void DumpCode(const u8* start, const u8* end);
@ -238,7 +229,11 @@ protected:
Arm64Gen::FixupBranch CheckIfSafeAddress(Arm64Gen::ARM64Reg addr, Arm64Gen::ARM64Reg tmp1,
Arm64Gen::ARM64Reg tmp2);
void DoJit(u32 em_address, JitBlock* b, u32 nextPC);
bool DoJit(u32 em_address, JitBlock* b, u32 nextPC);
// Finds a free memory region and sets the near and far code emitters to point at that region.
// Returns false if no free memory region can be found for either of the two.
bool SetEmitterStateToFreeCodeRegion();
void DoDownCount();
void Cleanup();
@ -246,6 +241,8 @@ protected:
void AllocStack();
void FreeStack();
void ResetFreeMemoryRanges();
// AsmRoutines
void GenerateAsm();
void GenerateCommonAsm();
@ -296,7 +293,6 @@ protected:
// <Fastmem fault location, slowmem handler location>
std::map<const u8*, FastmemArea> m_fault_to_handler;
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
Arm64GPRCache gpr;
Arm64FPRCache fpr;
@ -304,13 +300,20 @@ protected:
Arm64Gen::ARM64FloatEmitter m_float_emit;
Arm64Gen::ARM64CodeBlock farcode;
u8* nearcode; // Backed up when we switch to far code.
bool m_in_farcode = false;
Arm64Gen::ARM64CodeBlock m_far_code;
bool m_in_far_code = false;
// Backed up when we switch to far code.
u8* m_near_code;
u8* m_near_code_end;
bool m_near_code_write_failed;
bool m_enable_blr_optimization;
bool m_cleanup_after_stackfault = false;
u8* m_stack_base = nullptr;
u8* m_stack_pointer = nullptr;
u8* m_saved_stack_pointer = nullptr;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far;
};

View File

@ -12,6 +12,12 @@ JitArm64BlockCache::JitArm64BlockCache(JitBase& jit) : JitBaseBlockCache{jit}
{
}
void JitArm64BlockCache::Init()
{
JitBaseBlockCache::Init();
ClearRangesToFree();
}
void JitArm64BlockCache::WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit,
const JitBlock::LinkData& source, const JitBlock* dest)
{
@ -60,7 +66,7 @@ void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const
{
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
u8* location = source.exitPtrs;
ARM64XEmitter emit(location);
ARM64XEmitter emit(location, location + 12);
WriteLinkBlock(emit, source, dest);
emit.FlushIcache();
@ -69,9 +75,35 @@ void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const
void JitArm64BlockCache::WriteDestroyBlock(const JitBlock& block)
{
// Only clear the entry points as we might still be within this block.
ARM64XEmitter emit(block.checkedEntry);
ARM64XEmitter emit(block.checkedEntry, block.normalEntry + 4);
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
while (emit.GetWritableCodePtr() <= block.normalEntry)
emit.BRK(0x123);
emit.FlushIcache();
}
void JitArm64BlockCache::DestroyBlock(JitBlock& block)
{
JitBaseBlockCache::DestroyBlock(block);
if (block.near_begin != block.near_end)
m_ranges_to_free_on_next_codegen_near.emplace_back(block.near_begin, block.near_end);
if (block.far_begin != block.far_end)
m_ranges_to_free_on_next_codegen_far.emplace_back(block.far_begin, block.far_end);
}
const std::vector<std::pair<u8*, u8*>>& JitArm64BlockCache::GetRangesToFreeNear() const
{
return m_ranges_to_free_on_next_codegen_near;
}
const std::vector<std::pair<u8*, u8*>>& JitArm64BlockCache::GetRangesToFreeFar() const
{
return m_ranges_to_free_on_next_codegen_far;
}
void JitArm64BlockCache::ClearRangesToFree()
{
m_ranges_to_free_on_next_codegen_near.clear();
m_ranges_to_free_on_next_codegen_far.clear();
}

View File

@ -3,6 +3,8 @@
#pragma once
#include <vector>
#include "Common/Arm64Emitter.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
@ -15,10 +17,22 @@ class JitArm64BlockCache : public JitBaseBlockCache
public:
explicit JitArm64BlockCache(JitBase& jit);
void Init() override;
void DestroyBlock(JitBlock& block) override;
const std::vector<std::pair<u8*, u8*>>& GetRangesToFreeNear() const;
const std::vector<std::pair<u8*, u8*>>& GetRangesToFreeFar() const;
void ClearRangesToFree();
void WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit, const JitBlock::LinkData& source,
const JitBlock* dest = nullptr);
private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
void WriteDestroyBlock(const JitBlock& block) override;
std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen_near;
std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen_far;
};

View File

@ -121,40 +121,15 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
if (!fastmem || do_farcode)
{
if (fastmem && do_farcode)
{
if (emitting_routine)
{
in_far_code = true;
SwitchToFarCode();
}
else
{
SlowmemHandler handler;
handler.dest_reg = RS;
handler.addr_reg = addr;
handler.gprs = gprs_to_push;
handler.fprs = fprs_to_push;
handler.flags = flags;
if (!emitting_routine)
{
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
auto handler_loc_iter = m_handler_to_loc.find(handler);
if (handler_loc_iter == m_handler_to_loc.end())
{
in_far_code = true;
SwitchToFarCode();
const u8* handler_loc = GetCodePtr();
m_handler_to_loc[handler] = handler_loc;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
}
else
{
const u8* handler_loc = handler_loc_iter->second;
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = handler_loc;
return;
}
fastmem_area->slowmem_code = GetCodePtr();
}
}
@ -294,7 +269,7 @@ bool JitArm64::HandleFastmemFault(uintptr_t access_address, SContext* ctx)
return false;
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ARM64XEmitter emitter(const_cast<u8*>(fastmem_area_start));
ARM64XEmitter emitter(const_cast<u8*>(fastmem_area_start), const_cast<u8*>(fastmem_area_end));
emitter.BL(slow_handler_iter->second.slowmem_code);

View File

@ -35,7 +35,7 @@ public:
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
AllocCodeSpace(4096);
AddChildCodeSpace(&farcode, 2048);
AddChildCodeSpace(&m_far_code, 2048);
gpr.Init(this);
fpr.Init(this);