Merge pull request #11440 from JosJuice/jit64-negative-guard

Core: Allocate 2 GiB of guard pages below fastmem area
This commit is contained in:
JMC47 2023-01-14 19:19:53 -05:00 committed by GitHub
commit 30f0051f9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 21 deletions

View File

@ -104,13 +104,8 @@ void MemoryManager::Init()
const bool wii = SConfig::GetInstance().bWii; const bool wii = SConfig::GetInstance().bWii;
const bool mmu = Core::System::GetInstance().IsMMUMode(); const bool mmu = Core::System::GetInstance().IsMMUMode();
bool fake_vmem = false;
#ifndef _ARCH_32
// If MMU is turned off in GameCube mode, turn on fake VMEM hack. // If MMU is turned off in GameCube mode, turn on fake VMEM hack.
// The fake VMEM hack's address space is above the memory space that we const bool fake_vmem = !wii && !mmu;
// allocate on 32bit targets, so disable it there.
fake_vmem = !wii && !mmu;
#endif
u32 mem_size = 0; u32 mem_size = 0;
for (PhysicalMemoryRegion& region : m_physical_regions) for (PhysicalMemoryRegion& region : m_physical_regions)
@ -164,19 +159,51 @@ void MemoryManager::Init()
bool MemoryManager::InitFastmemArena() bool MemoryManager::InitFastmemArena()
{ {
#if _ARCH_32 // Here we set up memory mappings for fastmem. The basic idea of fastmem is that we reserve 4 GiB
const size_t memory_size = 0x31000000; // of virtual memory and lay out the addresses within that 4 GiB range just like the memory map of
#else // the emulated system. This lets the JIT emulate PPC load/store instructions by translating a PPC
const size_t memory_size = 0x400000000; // address to a host address as follows and then using a regular load/store instruction:
#endif //
m_physical_base = m_arena.ReserveMemoryRegion(memory_size); // RMEM = ppcState.msr.DR ? m_logical_base : m_physical_base
// host_address = RMEM + u32(ppc_address_base + ppc_address_offset)
//
// If the resulting host address is backed by real memory, the memory access will simply work.
// If not, a segfault handler will backpatch the JIT code to instead call functions in MMU.cpp.
// This way, most memory accesses will be super fast. We do pay a performance penalty for memory
// accesses that need special handling, but they're rare enough that it's very beneficial overall.
//
// Note: Jit64 (but not JitArm64) sometimes takes a shortcut when computing addresses and skips
// the cast to u32 that you see in the pseudocode above. When this happens, ppc_address_base
// is a 32-bit value stored in a 64-bit register (which effectively makes it behave like an
// unsigned 32-bit value), and ppc_address_offset is a signed 32-bit integer encoded directly
// into the load/store instruction. This can cause us to undershoot or overshoot the intended
// 4 GiB range by at most 2 GiB in either direction. So, make sure we have 2 GiB of guard pages
// on each side of each 4 GiB range.
//
// We need two 4 GiB ranges, one for PPC addresses with address translation disabled
// (m_physical_base) and one for PPC addresses with address translation enabled (m_logical_base),
// so our memory map ends up looking like this:
//
// 2 GiB guard
// 4 GiB view for disabled address translation
// 2 GiB guard
// 4 GiB view for enabled address translation
// 2 GiB guard
if (!m_physical_base) constexpr size_t ppc_view_size = 0x1'0000'0000;
constexpr size_t guard_size = 0x8000'0000;
constexpr size_t memory_size = ppc_view_size * 2 + guard_size * 3;
u8* fastmem_arena = m_arena.ReserveMemoryRegion(memory_size);
if (!fastmem_arena)
{ {
PanicAlertFmt("Memory::InitFastmemArena(): Failed finding a memory base."); PanicAlertFmt("Memory::InitFastmemArena(): Failed finding a memory base.");
return false; return false;
} }
m_physical_base = fastmem_arena + guard_size;
m_logical_base = fastmem_arena + ppc_view_size + guard_size * 2;
for (const PhysicalMemoryRegion& region : m_physical_regions) for (const PhysicalMemoryRegion& region : m_physical_regions)
{ {
if (!region.active) if (!region.active)
@ -194,10 +221,6 @@ bool MemoryManager::InitFastmemArena()
} }
} }
#ifndef _ARCH_32
m_logical_base = m_physical_base + 0x200000000;
#endif
m_is_fastmem_arena_initialized = true; m_is_fastmem_arena_initialized = true;
return true; return true;
} }

View File

@ -218,8 +218,8 @@ private:
// with address translation turned on. This mapping is computed based // with address translation turned on. This mapping is computed based
// on the BAT registers. // on the BAT registers.
// //
// Each of these 4GB regions is followed by 4GB of empty space so overflows // Each of these 4GB regions is surrounded by 2GB of empty space so overflows
// in address computation in the JIT don't access the wrong memory. // in address computation in the JIT don't access unrelated memory.
// //
// The neighboring mirrors of RAM ([0x02000000, 0x08000000), etc.) exist because // The neighboring mirrors of RAM ([0x02000000, 0x08000000), etc.) exist because
// the bus masks off the bits in question for RAM accesses; using them is a // the bus masks off the bits in question for RAM accesses; using them is a
@ -227,8 +227,6 @@ private:
// few buggy games (notably Rogue Squadron 2) use them by accident. They // few buggy games (notably Rogue Squadron 2) use them by accident. They
// aren't backed by memory mappings because they are used very rarely. // aren't backed by memory mappings because they are used very rarely.
// //
// Dolphin doesn't emulate the difference between cached and uncached access.
//
// TODO: The actual size of RAM is 24MB; the other 8MB shouldn't be backed by actual memory. // TODO: The actual size of RAM is 24MB; the other 8MB shouldn't be backed by actual memory.
// TODO: Do we want to handle the mirrors of the GC RAM? // TODO: Do we want to handle the mirrors of the GC RAM?
std::array<PhysicalMemoryRegion, 4> m_physical_regions{}; std::array<PhysicalMemoryRegion, 4> m_physical_regions{};