From 482da7975bad0d0be95e6bd808ee0b6b21ae37c1 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 2 Nov 2023 20:54:34 +0100 Subject: [PATCH] Jit: Define new terms related to fastmem Dolphin's JITs have a minor terminology problem: The term "fastmem" can refer to either the system of switching between a fast path and a slow path using backpatching, or to the fast path itself. To hopefully make things clearer, I'm adding some new terms, defining the old and new terms as follows: Fastmem: The system of switching from a fast path to a slow path by backpatching when an invalid memory access occurs. Fast access: A code path that accesses guest memory without calling C++ code. Slow access: A code path that accesses guest memory by calling C++ code. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 5 +-- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 12 +++--- .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 4 +- .../PowerPC/Jit64Common/TrampolineCache.cpp | 4 +- Source/Core/Core/PowerPC/JitArm64/Jit.h | 20 ++++----- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 42 +++++++++---------- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 4 +- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 8 ++-- Source/Core/Core/PowerPC/MMU.cpp | 6 ++- 9 files changed, 54 insertions(+), 51 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index ea65a5c6f0..9b089196b8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -888,9 +888,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.constantGqrValid = BitSet8(); // Assume that GQR values don't change often at runtime. Many paired-heavy games use largely float - // loads and stores, - // which are significantly faster when inlined (especially in MMU mode, where this lets them use - // fastmem). + // loads and stores, which are significantly faster when inlined (especially in MMU mode, where + // this lets them use fastmem). if (js.pairedQuantizeAddresses.find(js.blockStart) == js.pairedQuantizeAddresses.end()) { // If there are GQRs used but not set, we'll treat those as constant and optimize them diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 53bae65dd4..406fb6050f 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -319,12 +319,12 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) { - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0; auto& js = m_jit.js; registersInUse[reg_value] = false; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !force_slow_access) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -373,7 +373,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, FixupBranch exit; const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR; const bool fast_check_address = - !slowmem && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; + !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; if (fast_check_address) { FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); @@ -491,14 +491,14 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces BitSet32 registersInUse, int flags) { bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; + bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0; // set the correct immediate format reg_value = FixImmediate(accessSize, reg_value); auto& js = m_jit.js; if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) && - !slowmem) + !force_slow_access) { u8* backpatchStart = GetWritableCodePtr(); MovInfo mov; @@ -543,7 +543,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces FixupBranch exit; const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR; const bool fast_check_address = - !slowmem && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; + !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; if (fast_check_address) { FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 5522f920d3..ba9433134f 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -76,8 +76,8 @@ public: // This indicates that the write being generated cannot be patched (and thus can't use fastmem) SAFE_LOADSTORE_NO_FASTMEM = 4, SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8, - // Force slowmem (used when generating fallbacks in trampolines) - SAFE_LOADSTORE_FORCE_SLOWMEM = 16, + // Always call into C++ (used when generating fallbacks in trampolines) + SAFE_LOADSTORE_FORCE_SLOW_ACCESS = 16, SAFE_LOADSTORE_DR_ON = 32, // Generated from a context that doesn't have the PC of the instruction that caused it SAFE_LOADSTORE_NO_UPDATE_PC = 64, diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp index 245fea4cae..186868f7a8 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp @@ -44,7 +44,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info) const u8* trampoline = GetCodePtr(); SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse, - info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM); + info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); JMP(info.start + info.len, Jump::Near); @@ -63,7 +63,7 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info) // check anyway. SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset, - info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM); + info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOW_ACCESS); JMP(info.start + info.len, Jump::Near); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index e8056fb3d8..3b29bfc0fb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -189,8 +189,8 @@ public: protected: struct FastmemArea { - const u8* fastmem_code; - const u8* slowmem_code; + const u8* fast_access_code; + const u8* slow_access_code; }; void SetBlockLinkingEnabled(bool enabled); @@ -229,10 +229,10 @@ protected: { // Always calls the slow C++ code. For performance reasons, should generally only be used if // the guest address is known in advance and IsOptimizableRAMAddress returns false for it. - AlwaysSafe, + AlwaysSlowAccess, // Only emits fast access code. Must only be used if the guest address is known in advance // and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash! - AlwaysUnsafe, + AlwaysFastAccess, // Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem): // Tries to run fast access code, and if that fails, uses backpatching to replace the code // with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work, @@ -252,20 +252,20 @@ protected: // Store float: X1 Q0 // Load float: X0 // - // If mode == AlwaysUnsafe, the addr argument can be any register. + // If mode == AlwaysFastAccess, the addr argument can be any register. // Otherwise it must be the register listed in the table above. // // Additional scratch registers are used in the following situations: // // emitting_routine && mode == Auto: X2 // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 - // emitting_routine && mode != AlwaysSafe && !jo.fastmem: X3 - // mode != AlwaysSafe && !jo.fastmem: X2 - // !emitting_routine && mode != AlwaysSafe && !jo.fastmem: X30 + // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3 + // mode != AlwaysSlowAccess && !jo.fastmem: X2 + // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30 // // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push - // may be clobbered if mode != AlwaysUnsafe. + // may be clobbered if mode != AlwaysFastAccess. void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); @@ -356,7 +356,7 @@ protected: void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg); void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input); - // + // std::map m_fault_to_handler{}; Arm64GPRCache gpr; Arm64FPRCache fpr; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 2f2add68fc..cb8326439f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -60,16 +60,16 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const u32 access_size = BackPatchInfo::GetFlagSize(flags); if (m_accurate_cpu_cache_enabled) - mode = MemAccessMode::AlwaysSafe; + mode = MemAccessMode::AlwaysSlowAccess; - const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe; - const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe; + const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess; + const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess; bool in_far_code = false; - const u8* fastmem_start = GetCodePtr(); - std::optional slowmem_fixup; + const u8* fast_access_start = GetCodePtr(); + std::optional slow_access_fixup; - if (emit_fastmem) + if (emit_fast_access) { ARM64Reg memory_base = MEM_REG; ARM64Reg memory_offset = addr; @@ -84,21 +84,21 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); LDR(memory_base, MEM_REG, ArithOption(temp, true)); - if (emit_slowmem) + if (emit_slow_access) { FixupBranch pass = CBNZ(memory_base); - slowmem_fixup = B(); + slow_access_fixup = B(); SetJumpTarget(pass); } AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64)); } - else if (emit_slowmem && emitting_routine) + else if (emit_slow_access && emitting_routine) { const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; const ARM64Reg temp2 = ARM64Reg::W2; - slowmem_fixup = CheckIfSafeAddress(addr, temp1, temp2); + slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2); } if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT)) @@ -147,27 +147,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false); } } - const u8* fastmem_end = GetCodePtr(); + const u8* fast_access_end = GetCodePtr(); - if (emit_slowmem) + if (emit_slow_access) { const bool memcheck = jo.memcheck && !emitting_routine; - if (emit_fastmem) + if (emit_fast_access) { in_far_code = true; SwitchToFarCode(); if (jo.fastmem && !emitting_routine) { - FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end]; - fastmem_area->fastmem_code = fastmem_start; - fastmem_area->slowmem_code = GetCodePtr(); + FastmemArea* fastmem_area = &m_fault_to_handler[fast_access_end]; + fastmem_area->fast_access_code = fast_access_start; + fastmem_area->slow_access_code = GetCodePtr(); } } - if (slowmem_fixup) - SetJumpTarget(*slowmem_fixup); + if (slow_access_fixup) + SetJumpTarget(*slow_access_fixup); const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0; const int temp_gpr_index = DecodeReg(temp_gpr); @@ -304,7 +304,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (in_far_code) { - if (slowmem_fixup) + if (slow_access_fixup) { FixupBranch done = B(); SwitchToNearCode(); @@ -327,7 +327,7 @@ bool JitArm64::HandleFastmemFault(SContext* ctx) if (slow_handler_iter == m_fault_to_handler.end()) return false; - const u8* fastmem_area_start = slow_handler_iter->second.fastmem_code; + const u8* fastmem_area_start = slow_handler_iter->second.fast_access_code; const u8* fastmem_area_end = slow_handler_iter->first; // no overlapping fastmem area found @@ -337,7 +337,7 @@ bool JitArm64::HandleFastmemFault(SContext* ctx) const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes; ARM64XEmitter emitter(const_cast(fastmem_area_start), const_cast(fastmem_area_end)); - emitter.BL(slow_handler_iter->second.slowmem_code); + emitter.BL(slow_handler_iter->second.slow_access_code); while (emitter.GetCodePtr() < fastmem_area_end) emitter.NOP(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index e328219cb6..288f532757 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -137,7 +137,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use, + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, fprs_in_use); } else if (mmio_address) @@ -309,7 +309,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use); } else if (mmio_address) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 50fd5edc3a..a0fa812671 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -176,7 +176,7 @@ void JitArm64::lfXX(UGeckoInstruction inst) if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr)) { - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); } else { @@ -402,12 +402,14 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (m_mmu.IsOptimizableRAMAddress(imm_addr)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, + fprs_in_use); } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, + fprs_in_use); } } else diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index dcb1b75654..ae4f278b25 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1564,7 +1564,8 @@ void MMU::UpdateBATs(BatTable& bat_table, u32 base_spr) valid_bit |= BAT_WI_BIT; // Enable fastmem mappings for cached memory. There are quirks related to uncached memory - // that fastmem doesn't emulate properly (though no normal games are known to rely on them). + // that can't be correctly emulated by fast accesses, so we don't map uncached memory. + // (No normal games are known to rely on the quirks, though.) if (!wi) { if (m_memory.GetFakeVMEM() && (physical_address & 0xFE000000) == 0x7E000000) @@ -1587,7 +1588,8 @@ void MMU::UpdateBATs(BatTable& bat_table, u32 base_spr) } } - // Fastmem doesn't support memchecks, so disable it for all overlapping virtual pages. + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. if (m_power_pc.GetMemChecks().OverlapsMemcheck(virtual_address, BAT_PAGE_SIZE)) valid_bit &= ~BAT_PHYSICAL_BIT;