Merge pull request #8537 from degasus/fastmem

Core/HW -> PowerPC/JIT: Fastmem arena construction
This commit is contained in:
Connor McLaughlin 2020-01-14 09:38:15 +10:00 committed by GitHub
commit efc1ee8e6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 100 additions and 33 deletions

View File

@ -42,6 +42,7 @@ namespace Memory
// Store the MemArena here
u8* physical_base = nullptr;
u8* logical_base = nullptr;
static bool is_fastmem_arena_initialized = false;
// The MemArena class
static Common::MemArena g_arena;
@ -131,7 +132,7 @@ struct LogicalMemoryView
// other devices, like the GPU, use other rules, approximated by
// Memory::GetPointer.) This memory is laid out as follows:
// [0x00000000, 0x02000000) - 32MB RAM
// [0x02000000, 0x08000000) - Mirrors of 32MB RAM
// [0x02000000, 0x08000000) - Mirrors of 32MB RAM (not handled here)
// [0x08000000, 0x0C000000) - EFB "mapping" (not handled here)
// [0x0C000000, 0x0E000000) - MMIO etc. (not handled here)
// [0x10000000, 0x14000000) - 64MB RAM (Wii-only; slightly slower)
@ -155,6 +156,7 @@ struct LogicalMemoryView
//
// TODO: The actual size of RAM is REALRAM_SIZE (24MB); the other 8MB shouldn't
// be backed by actual memory.
// TODO: Do we want to handle the mirrors of the GC RAM?
static PhysicalMemoryRegion physical_regions[] = {
{&m_pRAM, 0x00000000, RAM_SIZE, PhysicalMemoryRegion::ALWAYS},
{&m_pL1Cache, 0xE0000000, L1_CACHE_SIZE, PhysicalMemoryRegion::ALWAYS},
@ -164,7 +166,7 @@ static PhysicalMemoryRegion physical_regions[] = {
static std::vector<LogicalMemoryView> logical_mapped_entries;
void Init()
static u32 GetFlags()
{
bool wii = SConfig::GetInstance().bWii;
bool bMMU = SConfig::GetInstance().bMMU;
@ -181,6 +183,14 @@ void Init()
flags |= PhysicalMemoryRegion::WII_ONLY;
if (bFakeVMEM)
flags |= PhysicalMemoryRegion::FAKE_VMEM;
return flags;
}
void Init()
{
bool wii = SConfig::GetInstance().bWii;
u32 flags = GetFlags();
u32 mem_size = 0;
for (PhysicalMemoryRegion& region : physical_regions)
{
@ -190,15 +200,14 @@ void Init()
mem_size += region.size;
}
g_arena.GrabSHMSegment(mem_size);
physical_base = Common::MemArena::FindMemoryBase();
// Create an anonymous view of the physical memory
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
*region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size, base);
*region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size);
if (!*region.out_pointer)
{
@ -207,10 +216,6 @@ void Init()
}
}
#ifndef _ARCH_32
logical_base = physical_base + 0x200000000;
#endif
if (wii)
mmio_mapping = InitMMIOWii();
else
@ -222,8 +227,41 @@ void Init()
m_IsInitialized = true;
}
bool InitFastmemArena()
{
u32 flags = GetFlags();
physical_base = Common::MemArena::FindMemoryBase();
if (!physical_base)
return false;
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
u8* view = (u8*)g_arena.CreateView(region.shm_position, region.size, base);
if (base != view)
{
return false;
}
}
#ifndef _ARCH_32
logical_base = physical_base + 0x200000000;
#endif
is_fastmem_arena_initialized = true;
return true;
}
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
{
if (!is_fastmem_arena_initialized)
return;
for (auto& entry : logical_mapped_entries)
{
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
@ -279,12 +317,10 @@ void DoState(PointerWrap& p)
void Shutdown()
{
ShutdownFastmemArena();
m_IsInitialized = false;
u32 flags = 0;
if (SConfig::GetInstance().bWii)
flags |= PhysicalMemoryRegion::WII_ONLY;
if (m_pFakeVMEM)
flags |= PhysicalMemoryRegion::FAKE_VMEM;
u32 flags = GetFlags();
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
@ -292,16 +328,36 @@ void Shutdown()
g_arena.ReleaseView(*region.out_pointer, region.size);
*region.out_pointer = nullptr;
}
g_arena.ReleaseSHMSegment();
mmio_mapping.reset();
INFO_LOG(MEMMAP, "Memory system shut down.");
}
void ShutdownFastmemArena()
{
if (!is_fastmem_arena_initialized)
return;
u32 flags = GetFlags();
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
g_arena.ReleaseView(base, region.size);
}
for (auto& entry : logical_mapped_entries)
{
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();
g_arena.ReleaseSHMSegment();
physical_base = nullptr;
logical_base = nullptr;
mmio_mapping.reset();
INFO_LOG(MEMMAP, "Memory system shut down.");
is_fastmem_arena_initialized = false;
}
void Clear()

View File

@ -62,6 +62,8 @@ extern std::unique_ptr<MMIO::Mapping> mmio_mapping;
bool IsInitialized();
void Init();
void Shutdown();
bool InitFastmemArena();
void ShutdownFastmemArena();
void DoState(PointerWrap& p);
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table);

View File

@ -333,6 +333,7 @@ void Jit64::Init()
InitializeInstructionTables();
EnableBlockLink();
jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
jo.optimizeGatherPipe = true;
jo.accurateSinglePrecision = true;
UpdateMemoryOptions();
@ -393,6 +394,8 @@ void Jit64::Shutdown()
FreeStack();
FreeCodeSpace();
Memory::ShutdownFastmemArena();
blocks.Shutdown();
m_far_code.Shutdown();
m_const_pool.Shutdown();

View File

@ -304,7 +304,9 @@ void Jit64::dcbz(UGeckoInstruction inst)
AND(32, R(RSCRATCH), Imm32(~31));
}
if (MSR.DR)
bool emit_fast_path = MSR.DR && m_jit.jo.fastmem_arena;
if (emit_fast_path)
{
// Perform lookup to see if we can use fast path.
MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0]));
@ -329,7 +331,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (MSR.DR)
if (emit_fast_path)
{
FixupBranch end = J(true);
SwitchToNearCode();

View File

@ -371,7 +371,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR;
const bool fast_check_address = !slowmem && dr_set;
const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena;
if (fast_check_address)
{
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
@ -435,7 +435,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
BitSet32 registersInUse, bool signExtend)
{
// If the address is known to be RAM, just load it directly.
if (PowerPC::IsOptimizableRAMAddress(address))
if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address))
{
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
return;
@ -539,7 +539,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR;
const bool fast_check_address = !slowmem && dr_set;
const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena;
if (fast_check_address)
{
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);
@ -641,7 +641,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
m_jit.js.fifoBytesSinceCheck += accessSize >> 3;
return false;
}
else if (PowerPC::IsOptimizableRAMAddress(address))
else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address))
{
WriteToConstRamAddress(accessSize, arg, address);
return false;

View File

@ -489,6 +489,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
int size = sizes[type] * (single ? 1 : 2);
bool isInline = quantize != -1;
bool safe_access = m_jit.jo.memcheck || !m_jit.jo.fastmem;
// illegal
if (type == QUANTIZE_INVALID1 || type == QUANTIZE_INVALID2 || type == QUANTIZE_INVALID3)
@ -506,7 +507,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
bool extend = single && (type == QUANTIZE_S8 || type == QUANTIZE_S16);
if (m_jit.jo.memcheck)
if (safe_access)
{
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE_LOAD;
int flags = isInline ? 0 :
@ -632,8 +633,9 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
{
int size = single ? 32 : 64;
bool extend = false;
bool safe_access = m_jit.jo.memcheck || !m_jit.jo.fastmem;
if (m_jit.jo.memcheck)
if (safe_access)
{
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE;
int flags = isInline ? 0 :
@ -644,7 +646,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
if (single)
{
if (m_jit.jo.memcheck)
if (safe_access)
{
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
}
@ -669,7 +671,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
// for a good reason, or merely because no game does this.
// If we find something that actually does do this, maybe this should be changed. How
// much of a performance hit would it be?
if (m_jit.jo.memcheck)
if (safe_access)
{
ROL(64, R(RSCRATCH_EXTRA), Imm8(32));
MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA));

View File

@ -49,6 +49,8 @@ void JitArm64::Init()
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE;
AllocCodeSpace(CODE_SIZE + child_code_size);
AddChildCodeSpace(&farcode, child_code_size);
jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
UpdateMemoryOptions();
@ -133,6 +135,7 @@ void JitArm64::ClearCache()
void JitArm64::Shutdown()
{
Memory::ShutdownFastmemArena();
FreeCodeSpace();
blocks.Shutdown();
FreeStack();

View File

@ -45,6 +45,6 @@ bool JitBase::CanMergeNextInstructions(int count) const
void JitBase::UpdateMemoryOptions()
{
bool any_watchpoints = PowerPC::memchecks.HasAny();
jo.fastmem = SConfig::GetInstance().bFastmem && (MSR.DR || !any_watchpoints);
jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints);
jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints;
}

View File

@ -48,6 +48,7 @@ protected:
bool optimizeGatherPipe;
bool accurateSinglePrecision;
bool fastmem;
bool fastmem_arena;
bool memcheck;
bool profile_blocks;
};

View File

@ -1117,13 +1117,12 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe
for (int i = 0; i < 8; i++, pteg_addr += 8)
{
u32 pteg;
std::memcpy(&pteg, &Memory::physical_base[pteg_addr], sizeof(u32));
u32 pteg = Common::swap32(Memory::Read_U32(pteg_addr));
if (pte1 == pteg)
{
UPTE2 PTE2;
PTE2.Hex = Common::swap32(&Memory::physical_base[pteg_addr + 4]);
PTE2.Hex = Memory::Read_U32(pteg_addr + 4);
// set the access bits
switch (flag)
@ -1145,8 +1144,7 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe
if (!IsNoExceptionFlag(flag))
{
const u32 swapped_pte2 = Common::swap32(PTE2.Hex);
std::memcpy(&Memory::physical_base[pteg_addr + 4], &swapped_pte2, sizeof(u32));
Memory::Write_U32(PTE2.Hex, pteg_addr + 4);
}
// We already updated the TLB entry if this was caused by a C bit.