Jit: Define new terms related to fastmem

Dolphin's JITs have a minor terminology problem: The term "fastmem" can
refer to either the system of switching between a fast path and a slow
path using backpatching, or to the fast path itself. To hopefully make
things clearer, I'm adding some new terms, defining the old and new
terms as follows:

Fastmem: The system of switching from a fast path to a slow path by
backpatching when an invalid memory access occurs.

Fast access: A code path that accesses guest memory without calling C++
code.

Slow access: A code path that accesses guest memory by calling C++ code.
This commit is contained in:
JosJuice 2023-11-02 20:54:34 +01:00
parent 17122f171a
commit 482da7975b
9 changed files with 54 additions and 51 deletions

View File

@ -888,9 +888,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.constantGqrValid = BitSet8();
// Assume that GQR values don't change often at runtime. Many paired-heavy games use largely float
// loads and stores,
// which are significantly faster when inlined (especially in MMU mode, where this lets them use
// fastmem).
// loads and stores, which are significantly faster when inlined (especially in MMU mode, where
// this lets them use fastmem).
if (js.pairedQuantizeAddresses.find(js.blockStart) == js.pairedQuantizeAddresses.end())
{
// If there are GQRs used but not set, we'll treat those as constant and optimize them

View File

@ -319,12 +319,12 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
{
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0;
auto& js = m_jit.js;
registersInUse[reg_value] = false;
if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) &&
!slowmem)
!force_slow_access)
{
u8* backpatchStart = GetWritableCodePtr();
MovInfo mov;
@ -373,7 +373,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR;
const bool fast_check_address =
!slowmem && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address)
{
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
@ -491,14 +491,14 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
BitSet32 registersInUse, int flags)
{
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0;
// set the correct immediate format
reg_value = FixImmediate(accessSize, reg_value);
auto& js = m_jit.js;
if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) &&
!slowmem)
!force_slow_access)
{
u8* backpatchStart = GetWritableCodePtr();
MovInfo mov;
@ -543,7 +543,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR;
const bool fast_check_address =
!slowmem && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address)
{
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);

View File

@ -76,8 +76,8 @@ public:
// This indicates that the write being generated cannot be patched (and thus can't use fastmem)
SAFE_LOADSTORE_NO_FASTMEM = 4,
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
// Force slowmem (used when generating fallbacks in trampolines)
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
// Always call into C++ (used when generating fallbacks in trampolines)
SAFE_LOADSTORE_FORCE_SLOW_ACCESS = 16,
SAFE_LOADSTORE_DR_ON = 32,
// Generated from a context that doesn't have the PC of the instruction that caused it
SAFE_LOADSTORE_NO_UPDATE_PC = 64,

View File

@ -44,7 +44,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
const u8* trampoline = GetCodePtr();
SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS);
JMP(info.start + info.len, Jump::Near);
@ -63,7 +63,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
// check anyway.
SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS);
JMP(info.start + info.len, Jump::Near);

View File

@ -189,8 +189,8 @@ public:
protected:
struct FastmemArea
{
const u8* fastmem_code;
const u8* slowmem_code;
const u8* fast_access_code;
const u8* slow_access_code;
};
void SetBlockLinkingEnabled(bool enabled);
@ -229,10 +229,10 @@ protected:
{
// Always calls the slow C++ code. For performance reasons, should generally only be used if
// the guest address is known in advance and IsOptimizableRAMAddress returns false for it.
AlwaysSafe,
AlwaysSlowAccess,
// Only emits fast access code. Must only be used if the guest address is known in advance
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
AlwaysUnsafe,
AlwaysFastAccess,
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem):
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
@ -252,20 +252,20 @@ protected:
// Store float: X1 Q0
// Load float: X0
//
// If mode == AlwaysUnsafe, the addr argument can be any register.
// If mode == AlwaysFastAccess, the addr argument can be any register.
// Otherwise it must be the register listed in the table above.
//
// Additional scratch registers are used in the following situations:
//
// emitting_routine && mode == Auto: X2
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSafe && !jo.fastmem: X3
// mode != AlwaysSafe && !jo.fastmem: X2
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem: X30
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
// mode != AlwaysSlowAccess && !jo.fastmem: X2
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30
//
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
// may be clobbered if mode != AlwaysUnsafe.
// may be clobbered if mode != AlwaysFastAccess.
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
@ -356,7 +356,7 @@ protected:
void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg);
void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input);
// <Fastmem fault location, slowmem handler location>
// <Fast path fault location, slow path handler location>
std::map<const u8*, FastmemArea> m_fault_to_handler{};
Arm64GPRCache gpr;
Arm64FPRCache fpr;

View File

@ -60,16 +60,16 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
if (m_accurate_cpu_cache_enabled)
mode = MemAccessMode::AlwaysSafe;
mode = MemAccessMode::AlwaysSlowAccess;
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
bool in_far_code = false;
const u8* fastmem_start = GetCodePtr();
std::optional<FixupBranch> slowmem_fixup;
const u8* fast_access_start = GetCodePtr();
std::optional<FixupBranch> slow_access_fixup;
if (emit_fastmem)
if (emit_fast_access)
{
ARM64Reg memory_base = MEM_REG;
ARM64Reg memory_offset = addr;
@ -84,21 +84,21 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));
if (emit_slowmem)
if (emit_slow_access)
{
FixupBranch pass = CBNZ(memory_base);
slowmem_fixup = B();
slow_access_fixup = B();
SetJumpTarget(pass);
}
AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
}
else if (emit_slowmem && emitting_routine)
else if (emit_slow_access && emitting_routine)
{
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2;
slowmem_fixup = CheckIfSafeAddress(addr, temp1, temp2);
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
}
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
@ -147,27 +147,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
}
}
const u8* fastmem_end = GetCodePtr();
const u8* fast_access_end = GetCodePtr();
if (emit_slowmem)
if (emit_slow_access)
{
const bool memcheck = jo.memcheck && !emitting_routine;
if (emit_fastmem)
if (emit_fast_access)
{
in_far_code = true;
SwitchToFarCode();
if (jo.fastmem && !emitting_routine)
{
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
fastmem_area->fastmem_code = fastmem_start;
fastmem_area->slowmem_code = GetCodePtr();
FastmemArea* fastmem_area = &m_fault_to_handler[fast_access_end];
fastmem_area->fast_access_code = fast_access_start;
fastmem_area->slow_access_code = GetCodePtr();
}
}
if (slowmem_fixup)
SetJumpTarget(*slowmem_fixup);
if (slow_access_fixup)
SetJumpTarget(*slow_access_fixup);
const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0;
const int temp_gpr_index = DecodeReg(temp_gpr);
@ -304,7 +304,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (in_far_code)
{
if (slowmem_fixup)
if (slow_access_fixup)
{
FixupBranch done = B();
SwitchToNearCode();
@ -327,7 +327,7 @@ bool JitArm64::HandleFastmemFault(SContext* ctx)
if (slow_handler_iter == m_fault_to_handler.end())
return false;
const u8* fastmem_area_start = slow_handler_iter->second.fastmem_code;
const u8* fastmem_area_start = slow_handler_iter->second.fast_access_code;
const u8* fastmem_area_end = slow_handler_iter->first;
// no overlapping fastmem area found
@ -337,7 +337,7 @@ bool JitArm64::HandleFastmemFault(SContext* ctx)
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ARM64XEmitter emitter(const_cast<u8*>(fastmem_area_start), const_cast<u8*>(fastmem_area_end));
emitter.BL(slow_handler_iter->second.slowmem_code);
emitter.BL(slow_handler_iter->second.slow_access_code);
while (emitter.GetCodePtr() < fastmem_area_end)
emitter.NOP();

View File

@ -137,7 +137,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use,
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use,
fprs_in_use);
}
else if (mmio_address)
@ -309,7 +309,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use);
}
else if (mmio_address)
{

View File

@ -176,7 +176,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr))
{
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use);
}
else
{
@ -402,12 +402,14 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else if (m_mmu.IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use,
fprs_in_use);
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use,
fprs_in_use);
}
}
else

View File

@ -1564,7 +1564,8 @@ void MMU::UpdateBATs(BatTable& bat_table, u32 base_spr)
valid_bit |= BAT_WI_BIT;
// Enable fastmem mappings for cached memory. There are quirks related to uncached memory
// that fastmem doesn't emulate properly (though no normal games are known to rely on them).
// that can't be correctly emulated by fast accesses, so we don't map uncached memory.
// (No normal games are known to rely on the quirks, though.)
if (!wi)
{
if (m_memory.GetFakeVMEM() && (physical_address & 0xFE000000) == 0x7E000000)
@ -1587,7 +1588,8 @@ void MMU::UpdateBATs(BatTable& bat_table, u32 base_spr)
}
}
// Fastmem doesn't support memchecks, so disable it for all overlapping virtual pages.
// Fast accesses don't support memchecks, so force slow accesses by removing fastmem
// mappings for all overlapping virtual pages.
if (m_power_pc.GetMemChecks().OverlapsMemcheck(virtual_address, BAT_PAGE_SIZE))
valid_bit &= ~BAT_PHYSICAL_BIT;