mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-14 21:37:52 -07:00
Jit: Define new terms related to fastmem
Dolphin's JITs have a minor terminology problem: The term "fastmem" can refer to either the system of switching between a fast path and a slow path using backpatching, or to the fast path itself. To hopefully make things clearer, I'm adding some new terms, defining the old and new terms as follows: Fastmem: The system of switching from a fast path to a slow path by backpatching when an invalid memory access occurs. Fast access: A code path that accesses guest memory without calling C++ code. Slow access: A code path that accesses guest memory by calling C++ code.
This commit is contained in:
parent
17122f171a
commit
482da7975b
@ -888,9 +888,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||
js.constantGqrValid = BitSet8();
|
||||
|
||||
// Assume that GQR values don't change often at runtime. Many paired-heavy games use largely float
|
||||
// loads and stores,
|
||||
// which are significantly faster when inlined (especially in MMU mode, where this lets them use
|
||||
// fastmem).
|
||||
// loads and stores, which are significantly faster when inlined (especially in MMU mode, where
|
||||
// this lets them use fastmem).
|
||||
if (js.pairedQuantizeAddresses.find(js.blockStart) == js.pairedQuantizeAddresses.end())
|
||||
{
|
||||
// If there are GQRs used but not set, we'll treat those as constant and optimize them
|
||||
|
@ -319,12 +319,12 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
|
||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
|
||||
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
|
||||
{
|
||||
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||
bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0;
|
||||
|
||||
auto& js = m_jit.js;
|
||||
registersInUse[reg_value] = false;
|
||||
if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) &&
|
||||
!slowmem)
|
||||
!force_slow_access)
|
||||
{
|
||||
u8* backpatchStart = GetWritableCodePtr();
|
||||
MovInfo mov;
|
||||
@ -373,7 +373,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||
FixupBranch exit;
|
||||
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR;
|
||||
const bool fast_check_address =
|
||||
!slowmem && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
|
||||
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
|
||||
if (fast_check_address)
|
||||
{
|
||||
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
|
||||
@ -491,14 +491,14 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
||||
BitSet32 registersInUse, int flags)
|
||||
{
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||
bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0;
|
||||
|
||||
// set the correct immediate format
|
||||
reg_value = FixImmediate(accessSize, reg_value);
|
||||
|
||||
auto& js = m_jit.js;
|
||||
if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) &&
|
||||
!slowmem)
|
||||
!force_slow_access)
|
||||
{
|
||||
u8* backpatchStart = GetWritableCodePtr();
|
||||
MovInfo mov;
|
||||
@ -543,7 +543,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
||||
FixupBranch exit;
|
||||
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR;
|
||||
const bool fast_check_address =
|
||||
!slowmem && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
|
||||
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
|
||||
if (fast_check_address)
|
||||
{
|
||||
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);
|
||||
|
@ -76,8 +76,8 @@ public:
|
||||
// This indicates that the write being generated cannot be patched (and thus can't use fastmem)
|
||||
SAFE_LOADSTORE_NO_FASTMEM = 4,
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
|
||||
// Force slowmem (used when generating fallbacks in trampolines)
|
||||
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
|
||||
// Always call into C++ (used when generating fallbacks in trampolines)
|
||||
SAFE_LOADSTORE_FORCE_SLOW_ACCESS = 16,
|
||||
SAFE_LOADSTORE_DR_ON = 32,
|
||||
// Generated from a context that doesn't have the PC of the instruction that caused it
|
||||
SAFE_LOADSTORE_NO_UPDATE_PC = 64,
|
||||
|
@ -44,7 +44,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
|
||||
const u8* trampoline = GetCodePtr();
|
||||
|
||||
SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
|
||||
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS);
|
||||
|
||||
JMP(info.start + info.len, Jump::Near);
|
||||
|
||||
@ -63,7 +63,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
|
||||
// check anyway.
|
||||
|
||||
SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
|
||||
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS);
|
||||
|
||||
JMP(info.start + info.len, Jump::Near);
|
||||
|
||||
|
@ -189,8 +189,8 @@ public:
|
||||
protected:
|
||||
struct FastmemArea
|
||||
{
|
||||
const u8* fastmem_code;
|
||||
const u8* slowmem_code;
|
||||
const u8* fast_access_code;
|
||||
const u8* slow_access_code;
|
||||
};
|
||||
|
||||
void SetBlockLinkingEnabled(bool enabled);
|
||||
@ -229,10 +229,10 @@ protected:
|
||||
{
|
||||
// Always calls the slow C++ code. For performance reasons, should generally only be used if
|
||||
// the guest address is known in advance and IsOptimizableRAMAddress returns false for it.
|
||||
AlwaysSafe,
|
||||
AlwaysSlowAccess,
|
||||
// Only emits fast access code. Must only be used if the guest address is known in advance
|
||||
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
|
||||
AlwaysUnsafe,
|
||||
AlwaysFastAccess,
|
||||
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem):
|
||||
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
|
||||
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
|
||||
@ -252,20 +252,20 @@ protected:
|
||||
// Store float: X1 Q0
|
||||
// Load float: X0
|
||||
//
|
||||
// If mode == AlwaysUnsafe, the addr argument can be any register.
|
||||
// If mode == AlwaysFastAccess, the addr argument can be any register.
|
||||
// Otherwise it must be the register listed in the table above.
|
||||
//
|
||||
// Additional scratch registers are used in the following situations:
|
||||
//
|
||||
// emitting_routine && mode == Auto: X2
|
||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// emitting_routine && mode != AlwaysSafe && !jo.fastmem: X3
|
||||
// mode != AlwaysSafe && !jo.fastmem: X2
|
||||
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem: X30
|
||||
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
|
||||
// mode != AlwaysSlowAccess && !jo.fastmem: X2
|
||||
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
|
||||
// !emitting_routine && mode == Auto && jo.fastmem: X30
|
||||
//
|
||||
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
|
||||
// may be clobbered if mode != AlwaysUnsafe.
|
||||
// may be clobbered if mode != AlwaysFastAccess.
|
||||
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
|
||||
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
|
||||
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
|
||||
@ -356,7 +356,7 @@ protected:
|
||||
void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg);
|
||||
void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input);
|
||||
|
||||
// <Fastmem fault location, slowmem handler location>
|
||||
// <Fast path fault location, slow path handler location>
|
||||
std::map<const u8*, FastmemArea> m_fault_to_handler{};
|
||||
Arm64GPRCache gpr;
|
||||
Arm64FPRCache fpr;
|
||||
|
@ -60,16 +60,16 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
||||
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||
|
||||
if (m_accurate_cpu_cache_enabled)
|
||||
mode = MemAccessMode::AlwaysSafe;
|
||||
mode = MemAccessMode::AlwaysSlowAccess;
|
||||
|
||||
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
|
||||
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;
|
||||
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
|
||||
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
|
||||
|
||||
bool in_far_code = false;
|
||||
const u8* fastmem_start = GetCodePtr();
|
||||
std::optional<FixupBranch> slowmem_fixup;
|
||||
const u8* fast_access_start = GetCodePtr();
|
||||
std::optional<FixupBranch> slow_access_fixup;
|
||||
|
||||
if (emit_fastmem)
|
||||
if (emit_fast_access)
|
||||
{
|
||||
ARM64Reg memory_base = MEM_REG;
|
||||
ARM64Reg memory_offset = addr;
|
||||
@ -84,21 +84,21 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
||||
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||
LDR(memory_base, MEM_REG, ArithOption(temp, true));
|
||||
|
||||
if (emit_slowmem)
|
||||
if (emit_slow_access)
|
||||
{
|
||||
FixupBranch pass = CBNZ(memory_base);
|
||||
slowmem_fixup = B();
|
||||
slow_access_fixup = B();
|
||||
SetJumpTarget(pass);
|
||||
}
|
||||
|
||||
AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
|
||||
}
|
||||
else if (emit_slowmem && emitting_routine)
|
||||
else if (emit_slow_access && emitting_routine)
|
||||
{
|
||||
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
|
||||
const ARM64Reg temp2 = ARM64Reg::W2;
|
||||
|
||||
slowmem_fixup = CheckIfSafeAddress(addr, temp1, temp2);
|
||||
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
|
||||
}
|
||||
|
||||
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||
@ -147,27 +147,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
||||
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
|
||||
}
|
||||
}
|
||||
const u8* fastmem_end = GetCodePtr();
|
||||
const u8* fast_access_end = GetCodePtr();
|
||||
|
||||
if (emit_slowmem)
|
||||
if (emit_slow_access)
|
||||
{
|
||||
const bool memcheck = jo.memcheck && !emitting_routine;
|
||||
|
||||
if (emit_fastmem)
|
||||
if (emit_fast_access)
|
||||
{
|
||||
in_far_code = true;
|
||||
SwitchToFarCode();
|
||||
|
||||
if (jo.fastmem && !emitting_routine)
|
||||
{
|
||||
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
|
||||
fastmem_area->fastmem_code = fastmem_start;
|
||||
fastmem_area->slowmem_code = GetCodePtr();
|
||||
FastmemArea* fastmem_area = &m_fault_to_handler[fast_access_end];
|
||||
fastmem_area->fast_access_code = fast_access_start;
|
||||
fastmem_area->slow_access_code = GetCodePtr();
|
||||
}
|
||||
}
|
||||
|
||||
if (slowmem_fixup)
|
||||
SetJumpTarget(*slowmem_fixup);
|
||||
if (slow_access_fixup)
|
||||
SetJumpTarget(*slow_access_fixup);
|
||||
|
||||
const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0;
|
||||
const int temp_gpr_index = DecodeReg(temp_gpr);
|
||||
@ -304,7 +304,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
||||
|
||||
if (in_far_code)
|
||||
{
|
||||
if (slowmem_fixup)
|
||||
if (slow_access_fixup)
|
||||
{
|
||||
FixupBranch done = B();
|
||||
SwitchToNearCode();
|
||||
@ -327,7 +327,7 @@ bool JitArm64::HandleFastmemFault(SContext* ctx)
|
||||
if (slow_handler_iter == m_fault_to_handler.end())
|
||||
return false;
|
||||
|
||||
const u8* fastmem_area_start = slow_handler_iter->second.fastmem_code;
|
||||
const u8* fastmem_area_start = slow_handler_iter->second.fast_access_code;
|
||||
const u8* fastmem_area_end = slow_handler_iter->first;
|
||||
|
||||
// no overlapping fastmem area found
|
||||
@ -337,7 +337,7 @@ bool JitArm64::HandleFastmemFault(SContext* ctx)
|
||||
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
|
||||
ARM64XEmitter emitter(const_cast<u8*>(fastmem_area_start), const_cast<u8*>(fastmem_area_end));
|
||||
|
||||
emitter.BL(slow_handler_iter->second.slowmem_code);
|
||||
emitter.BL(slow_handler_iter->second.slow_access_code);
|
||||
|
||||
while (emitter.GetCodePtr() < fastmem_area_end)
|
||||
emitter.NOP();
|
||||
|
@ -137,7 +137,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
||||
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use,
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use,
|
||||
fprs_in_use);
|
||||
}
|
||||
else if (mmio_address)
|
||||
@ -309,7 +309,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use);
|
||||
}
|
||||
else if (mmio_address)
|
||||
{
|
||||
|
@ -176,7 +176,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
||||
|
||||
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -402,12 +402,14 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
else if (m_mmu.IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use,
|
||||
fprs_in_use);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use,
|
||||
fprs_in_use);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1564,7 +1564,8 @@ void MMU::UpdateBATs(BatTable& bat_table, u32 base_spr)
|
||||
valid_bit |= BAT_WI_BIT;
|
||||
|
||||
// Enable fastmem mappings for cached memory. There are quirks related to uncached memory
|
||||
// that fastmem doesn't emulate properly (though no normal games are known to rely on them).
|
||||
// that can't be correctly emulated by fast accesses, so we don't map uncached memory.
|
||||
// (No normal games are known to rely on the quirks, though.)
|
||||
if (!wi)
|
||||
{
|
||||
if (m_memory.GetFakeVMEM() && (physical_address & 0xFE000000) == 0x7E000000)
|
||||
@ -1587,7 +1588,8 @@ void MMU::UpdateBATs(BatTable& bat_table, u32 base_spr)
|
||||
}
|
||||
}
|
||||
|
||||
// Fastmem doesn't support memchecks, so disable it for all overlapping virtual pages.
|
||||
// Fast accesses don't support memchecks, so force slow accesses by removing fastmem
|
||||
// mappings for all overlapping virtual pages.
|
||||
if (m_power_pc.GetMemChecks().OverlapsMemcheck(virtual_address, BAT_PAGE_SIZE))
|
||||
valid_bit &= ~BAT_PHYSICAL_BIT;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user