Merge pull request #4146 from degasus/dynamic-bat

Dynamic bat
This commit is contained in:
Markus Wick 2016-09-06 09:05:02 +02:00 committed by GitHub
commit dddd88aace
20 changed files with 688 additions and 609 deletions

View File

@ -157,118 +157,3 @@ u8* MemArena::FindMemoryBase()
return static_cast<u8*>(base);
#endif
}
// yeah, this could also be done in like two bitwise ops...
#define SKIP(a_flags, b_flags) \
if (!(a_flags & MV_WII_ONLY) && (b_flags & MV_WII_ONLY)) \
continue; \
if (!(a_flags & MV_FAKE_VMEM) && (b_flags & MV_FAKE_VMEM)) \
continue;
static bool Memory_TryBase(u8* base, MemoryView* views, int num_views, u32 flags, MemArena* arena)
{
// OK, we know where to find free space. Now grab it!
// We just mimic the popular BAT setup.
int i;
for (i = 0; i < num_views; i++)
{
MemoryView* view = &views[i];
void* view_base;
bool use_sw_mirror;
SKIP(flags, view->flags);
#if _ARCH_64
// On 64-bit, we map the same file position multiple times, so we
// don't need the software fallback for the mirrors.
view_base = base + view->virtual_address;
use_sw_mirror = false;
#else
// On 32-bit, we don't have the actual address space to store all
// the mirrors, so we just map the fallbacks somewhere in our address
// space and use the software fallbacks for mirroring.
view_base = base + (view->virtual_address & 0x3FFFFFFF);
use_sw_mirror = true;
#endif
if (use_sw_mirror && (view->flags & MV_MIRROR_PREVIOUS))
{
view->view_ptr = views[i - 1].view_ptr;
}
else
{
view->mapped_ptr = arena->CreateView(view->shm_position, view->size, view_base);
view->view_ptr = view->mapped_ptr;
}
if (!view->view_ptr)
{
// Argh! ERROR! Free what we grabbed so far so we can try again.
MemoryMap_Shutdown(views, i + 1, flags, arena);
return false;
}
if (view->out_ptr)
*(view->out_ptr) = (u8*)view->view_ptr;
}
return true;
}
static u32 MemoryMap_InitializeViews(MemoryView* views, int num_views, u32 flags)
{
u32 shm_position = 0;
u32 last_position = 0;
for (int i = 0; i < num_views; i++)
{
// Zero all the pointers to be sure.
views[i].mapped_ptr = nullptr;
SKIP(flags, views[i].flags);
if (views[i].flags & MV_MIRROR_PREVIOUS)
shm_position = last_position;
views[i].shm_position = shm_position;
last_position = shm_position;
shm_position += views[i].size;
}
return shm_position;
}
u8* MemoryMap_Setup(MemoryView* views, int num_views, u32 flags, MemArena* arena)
{
u32 total_mem = MemoryMap_InitializeViews(views, num_views, flags);
arena->GrabSHMSegment(total_mem);
// Now, create views in high memory where there's plenty of space.
u8* base = MemArena::FindMemoryBase();
// This really shouldn't fail - in 64-bit, there will always be enough
// address space.
if (!Memory_TryBase(base, views, num_views, flags, arena))
{
PanicAlert("MemoryMap_Setup: Failed finding a memory base.");
exit(0);
return nullptr;
}
return base;
}
void MemoryMap_Shutdown(MemoryView* views, int num_views, u32 flags, MemArena* arena)
{
std::set<void*> freeset;
for (int i = 0; i < num_views; i++)
{
MemoryView* view = &views[i];
if (view->mapped_ptr && !freeset.count(view->mapped_ptr))
{
arena->ReleaseView(view->mapped_ptr, view->size);
freeset.insert(view->mapped_ptr);
view->mapped_ptr = nullptr;
}
}
}

View File

@ -35,26 +35,3 @@ private:
int fd;
#endif
};
enum
{
MV_MIRROR_PREVIOUS = 1,
MV_FAKE_VMEM = 2,
MV_WII_ONLY = 4,
};
struct MemoryView
{
u8** out_ptr;
u64 virtual_address;
u32 size;
u32 flags;
void* mapped_ptr;
void* view_ptr;
u32 shm_position;
};
// Uses a memory arena to set up an emulator-friendly memory map according to
// a passed-in list of MemoryView structures.
u8* MemoryMap_Setup(MemoryView* views, int num_views, u32 flags, MemArena* arena);
void MemoryMap_Shutdown(MemoryView* views, int num_views, u32 flags, MemArena* arena);

View File

@ -237,6 +237,8 @@ bool CBoot::Load_BS2(const std::string& _rBootROMFilename)
PowerPC::ppcState.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::ppcState.spr[SPR_DBAT3U] = 0xfff0001f;
PowerPC::ppcState.spr[SPR_DBAT3L] = 0xfff00001;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
PC = 0x81200150;
return true;
}
@ -377,6 +379,8 @@ bool CBoot::BootUp()
PowerPC::ppcState.spr[SPR_DBAT4L] = 0x10000002;
PowerPC::ppcState.spr[SPR_DBAT5U] = 0xd0001fff;
PowerPC::ppcState.spr[SPR_DBAT5L] = 0x1000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
dolLoader.Load();
PC = dolLoader.GetEntryPoint();

View File

@ -52,6 +52,8 @@ bool CBoot::EmulatedBS2_GC(bool skipAppLoader)
PowerPC::ppcState.spr[SPR_DBAT0L] = 0x00000002;
PowerPC::ppcState.spr[SPR_DBAT1U] = 0xc0001fff;
PowerPC::ppcState.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
// Write necessary values
// Here we write values to memory that the apploader does not take care of. Game info goes
@ -298,11 +300,11 @@ bool CBoot::SetupWiiMemory(DiscIO::Country country)
Memory::Write_U32(0x8179d500, 0x00003110); // Init
Memory::Write_U32(0x04000000, 0x00003118); // Unknown
Memory::Write_U32(0x04000000, 0x0000311c); // BAT
Memory::Write_U32(0x93400000, 0x00003120); // BAT
Memory::Write_U32(0x93600000, 0x00003120); // BAT
Memory::Write_U32(0x90000800, 0x00003124); // Init - MEM2 low
Memory::Write_U32(0x93ae0000, 0x00003128); // Init - MEM2 high
Memory::Write_U32(0x93ae0000, 0x00003130); // IOS MEM2 low
Memory::Write_U32(0x93b00000, 0x00003134); // IOS MEM2 high
Memory::Write_U32(0x935e0000, 0x00003128); // Init - MEM2 high
Memory::Write_U32(0x935e0000, 0x00003130); // IOS MEM2 low
Memory::Write_U32(0x93600000, 0x00003134); // IOS MEM2 high
Memory::Write_U32(0x00000012, 0x00003138); // Console type
// 40 is copied from 88 after running apploader
Memory::Write_U32(0x00090204, 0x00003140); // IOS revision (IOS9, v2.4)
@ -371,6 +373,8 @@ bool CBoot::EmulatedBS2_Wii()
PowerPC::ppcState.spr[SPR_DBAT4L] = 0x10000002;
PowerPC::ppcState.spr[SPR_DBAT5U] = 0xd0001fff;
PowerPC::ppcState.spr[SPR_DBAT5L] = 0x1000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
Memory::Write_U32(0x4c000064, 0x00000300); // Write default DSI Handler: rfi
Memory::Write_U32(0x4c000064, 0x00000800); // Write default FPU Handler: rfi

View File

@ -85,6 +85,8 @@ bool CBoot::Boot_ELF(const std::string& filename)
PowerPC::ppcState.spr[SPR_DBAT4L] = 0x10000002;
PowerPC::ppcState.spr[SPR_DBAT5U] = 0xd0001fff;
PowerPC::ppcState.spr[SPR_DBAT5L] = 0x1000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
if (!reader.LoadSymbols())
{

View File

@ -410,6 +410,8 @@ void FifoPlayer::LoadMemory()
PowerPC::ppcState.spr[SPR_DBAT0L] = 0x00000002;
PowerPC::ppcState.spr[SPR_DBAT1U] = 0xc0001fff;
PowerPC::ppcState.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
SetupFifo();

View File

@ -96,6 +96,26 @@ bool IsInitialized()
return m_IsInitialized;
}
struct PhysicalMemoryRegion
{
u8** out_pointer;
u32 physical_address;
u32 size;
enum
{
ALWAYS = 0,
FAKE_VMEM = 1,
WII_ONLY = 2,
} flags;
u32 shm_position;
};
struct LogicalMemoryView
{
void* mapped_pointer;
u32 mapped_size;
};
// Dolphin allocates memory to represent four regions:
// - 32MB RAM (actually 24MB on hardware), available on Gamecube and Wii
// - 64MB "EXRAM", RAM only available on Wii
@ -116,28 +136,12 @@ bool IsInitialized()
// [0x08000000, 0x0C000000) - EFB "mapping" (not handled here)
// [0x0C000000, 0x0E000000) - MMIO etc. (not handled here)
// [0x10000000, 0x14000000) - 64MB RAM (Wii-only; slightly slower)
//
// The 4GB starting at logical_base represents access from the CPU
// with address translation turned on. Instead of changing the mapping
// based on the BAT registers, we approximate the common BAT configuration
// used by games:
// [0x00000000, 0x02000000) - 32MB RAM, cached access, normally only mapped
// during startup by Wii WADs
// [0x02000000, 0x08000000) - Mirrors of 32MB RAM (not implemented here)
// [0x40000000, 0x50000000) - FakeVMEM
// [0x70000000, 0x80000000) - FakeVMEM
// [0x80000000, 0x82000000) - 32MB RAM, cached access
// [0x82000000, 0x88000000) - Mirrors of 32MB RAM (not implemented here)
// [0x90000000, 0x94000000) - 64MB RAM, Wii-only, cached access
// [0xC0000000, 0xC2000000) - 32MB RAM, uncached access
// [0xC2000000, 0xC8000000) - Mirrors of 32MB RAM (not implemented here)
// [0xC8000000, 0xCC000000) - EFB "mapping" (not handled here)
// [0xCC000000, 0xCE000000) - MMIO etc. (not handled here)
// [0xD0000000, 0xD4000000) - 64MB RAM, Wii-only, uncached access
// [0x7E000000, 0x80000000) - FakeVMEM
// [0xE0000000, 0xE0040000) - 256KB locked L1
//
// TODO: We shouldn't hardcode this mapping; we can generate it dynamically
// based on the BAT registers.
// The 4GB starting at logical_base represents access from the CPU
// with address translation turned on. This mapping is computed based
// on the BAT registers.
//
// Each of these 4GB regions is followed by 4GB of empty space so overflows
// in address computation in the JIT don't access the wrong memory.
@ -152,18 +156,14 @@ bool IsInitialized()
//
// TODO: The actual size of RAM is REALRAM_SIZE (24MB); the other 8MB shouldn't
// be backed by actual memory.
static MemoryView views[] = {
{&m_pRAM, 0x00000000, RAM_SIZE, 0},
{nullptr, 0x200000000, RAM_SIZE, MV_MIRROR_PREVIOUS},
{nullptr, 0x280000000, RAM_SIZE, MV_MIRROR_PREVIOUS},
{nullptr, 0x2C0000000, RAM_SIZE, MV_MIRROR_PREVIOUS},
{&m_pL1Cache, 0x2E0000000, L1_CACHE_SIZE, 0},
{&m_pFakeVMEM, 0x27E000000, FAKEVMEM_SIZE, MV_FAKE_VMEM},
{&m_pEXRAM, 0x10000000, EXRAM_SIZE, MV_WII_ONLY},
{nullptr, 0x290000000, EXRAM_SIZE, MV_WII_ONLY | MV_MIRROR_PREVIOUS},
{nullptr, 0x2D0000000, EXRAM_SIZE, MV_WII_ONLY | MV_MIRROR_PREVIOUS},
static PhysicalMemoryRegion physical_regions[] = {
{&m_pRAM, 0x00000000, RAM_SIZE, PhysicalMemoryRegion::ALWAYS},
{&m_pL1Cache, 0xE0000000, L1_CACHE_SIZE, PhysicalMemoryRegion::ALWAYS},
{&m_pFakeVMEM, 0x7E000000, FAKEVMEM_SIZE, PhysicalMemoryRegion::FAKE_VMEM},
{&m_pEXRAM, 0x10000000, EXRAM_SIZE, PhysicalMemoryRegion::WII_ONLY},
};
static const int num_views = sizeof(views) / sizeof(MemoryView);
static std::vector<LogicalMemoryView> logical_mapped_entries;
void Init()
{
@ -178,10 +178,35 @@ void Init()
u32 flags = 0;
if (wii)
flags |= MV_WII_ONLY;
flags |= PhysicalMemoryRegion::WII_ONLY;
if (bFakeVMEM)
flags |= MV_FAKE_VMEM;
physical_base = MemoryMap_Setup(views, num_views, flags, &g_arena);
flags |= PhysicalMemoryRegion::FAKE_VMEM;
u32 mem_size = 0;
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
region.shm_position = mem_size;
mem_size += region.size;
}
g_arena.GrabSHMSegment(mem_size);
physical_base = MemArena::FindMemoryBase();
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
*region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size, base);
if (!*region.out_pointer)
{
PanicAlert("MemoryMap_Setup: Failed finding a memory base.");
exit(0);
}
}
#ifndef _ARCH_32
logical_base = physical_base + 0x200000000;
#endif
@ -197,6 +222,47 @@ void Init()
m_IsInitialized = true;
}
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
{
for (auto& entry : logical_mapped_entries)
{
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();
for (u32 i = 0; i < (1 << (32 - PowerPC::BAT_INDEX_SHIFT)); ++i)
{
if (dbat_table[i] & 1)
{
u32 logical_address = i << PowerPC::BAT_INDEX_SHIFT;
// TODO: Merge adjacent mappings to make this faster.
u32 logical_size = 1 << PowerPC::BAT_INDEX_SHIFT;
u32 translated_address = dbat_table[i] & ~3;
for (const auto& physical_region : physical_regions)
{
u32 mapping_address = physical_region.physical_address;
u32 mapping_end = mapping_address + physical_region.size;
u32 intersection_start = std::max(mapping_address, translated_address);
u32 intersection_end = std::min(mapping_end, translated_address + logical_size);
if (intersection_start < intersection_end)
{
// Found an overlapping region; map it.
u32 position = physical_region.shm_position + intersection_start - mapping_address;
u8* base = logical_base + logical_address + intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start;
void* mapped_pointer = g_arena.CreateView(position, mapped_size, base);
if (!mapped_pointer)
{
PanicAlert("MemoryMap_Setup: Failed finding a memory base.");
exit(0);
}
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
}
}
}
}
}
void DoState(PointerWrap& p)
{
bool wii = SConfig::GetInstance().bWii;
@ -216,10 +282,21 @@ void Shutdown()
m_IsInitialized = false;
u32 flags = 0;
if (SConfig::GetInstance().bWii)
flags |= MV_WII_ONLY;
flags |= PhysicalMemoryRegion::WII_ONLY;
if (bFakeVMEM)
flags |= MV_FAKE_VMEM;
MemoryMap_Shutdown(views, num_views, flags, &g_arena);
flags |= PhysicalMemoryRegion::FAKE_VMEM;
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
g_arena.ReleaseView(*region.out_pointer, region.size);
*region.out_pointer = 0;
}
for (auto& entry : logical_mapped_entries)
{
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();
g_arena.ReleaseSHMSegment();
physical_base = nullptr;
logical_base = nullptr;
@ -233,7 +310,9 @@ void Clear()
memset(m_pRAM, 0, RAM_SIZE);
if (m_pL1Cache)
memset(m_pL1Cache, 0, L1_CACHE_SIZE);
if (SConfig::GetInstance().bWii && m_pEXRAM)
if (m_pFakeVMEM)
memset(m_pFakeVMEM, 0, FAKEVMEM_SIZE);
if (m_pEXRAM)
memset(m_pEXRAM, 0, EXRAM_SIZE);
}

View File

@ -9,6 +9,7 @@
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Core/PowerPC/PowerPC.h"
// Enable memory checks in the Debug/DebugFast builds, but NOT in release
#if defined(_DEBUG) || defined(DEBUGFAST)
@ -57,13 +58,6 @@ enum
IO_SIZE = 0x00010000,
EXRAM_SIZE = 0x04000000,
EXRAM_MASK = EXRAM_SIZE - 1,
ADDR_MASK_HW_ACCESS = 0x0c000000,
ADDR_MASK_MEM1 = 0x20000000,
#if _ARCH_32
MEMVIEW32_MASK = 0x3FFFFFFF,
#endif
};
// MMIO mapping object.
@ -75,6 +69,8 @@ void Init();
void Shutdown();
void DoState(PointerWrap& p);
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table);
void Clear();
bool AreMemoryBreakpointsActivated();

View File

@ -962,6 +962,11 @@ IPCCommandResult CWII_IPC_HLE_Device_es::IOCtlV(u32 _CommandAddress)
if (pDolLoader->IsValid())
{
pDolLoader->Load(); // TODO: Check why sysmenu does not load the DOL correctly
// WADs start with address translation off at the given entry point.
//
// The state of other CPU registers (like the BAT registers) doesn't matter much
// because the WAD initializes everything itself anyway.
MSR = 0;
PC = pDolLoader->GetEntryPoint();
bSuccess = true;
}

View File

@ -297,6 +297,15 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
// TODO: emulate locked cache and DMA bits.
break;
case SPR_HID4:
if (oldValue != rSPR(iIndex))
{
WARN_LOG(POWERPC, "HID4 updated %x %x", oldValue, rSPR(iIndex));
PowerPC::IBATUpdated();
PowerPC::DBATUpdated();
}
break;
case SPR_WPAR:
_assert_msg_(POWERPC, rGPR[_inst.RD] == 0x0C008000, "Gather pipe @ %08x", PC);
GPFifo::ResetGatherPipe();
@ -354,6 +363,52 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
case SPR_XER:
SetXER(rSPR(iIndex));
break;
case SPR_DBAT0L:
case SPR_DBAT0U:
case SPR_DBAT1L:
case SPR_DBAT1U:
case SPR_DBAT2L:
case SPR_DBAT2U:
case SPR_DBAT3L:
case SPR_DBAT3U:
case SPR_DBAT4L:
case SPR_DBAT4U:
case SPR_DBAT5L:
case SPR_DBAT5U:
case SPR_DBAT6L:
case SPR_DBAT6U:
case SPR_DBAT7L:
case SPR_DBAT7U:
if (oldValue != rSPR(iIndex))
{
WARN_LOG(POWERPC, "DBAT updated %d %x %x", iIndex, oldValue, rSPR(iIndex));
PowerPC::DBATUpdated();
}
break;
case SPR_IBAT0L:
case SPR_IBAT0U:
case SPR_IBAT1L:
case SPR_IBAT1U:
case SPR_IBAT2L:
case SPR_IBAT2U:
case SPR_IBAT3L:
case SPR_IBAT3U:
case SPR_IBAT4L:
case SPR_IBAT4U:
case SPR_IBAT5L:
case SPR_IBAT5U:
case SPR_IBAT6L:
case SPR_IBAT6U:
case SPR_IBAT7L:
case SPR_IBAT7U:
if (oldValue != rSPR(iIndex))
{
WARN_LOG(POWERPC, "IBAT updated %d %x %x", iIndex, oldValue, rSPR(iIndex));
PowerPC::IBATUpdated();
}
break;
}
}

View File

@ -365,38 +365,40 @@ void Jit64::dcbz(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
MOV(32, R(RSCRATCH), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH), gpr.R(a));
AND(32, R(RSCRATCH), Imm32(~31));
TEST(32, R(RSCRATCH), Imm32(mem_mask));
FixupBranch slow = J_CC(CC_NZ, true);
// Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but
// supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure.
SwitchToFarCode();
SetJumpTarget(slow);
if (UReg_MSR(MSR).DR)
{
// Perform lookup to see if we can use fast path.
MOV(32, R(RSCRATCH2), R(RSCRATCH));
SHR(32, R(RSCRATCH2), Imm8(PowerPC::BAT_INDEX_SHIFT));
TEST(32, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)&PowerPC::dbat_table[0]), Imm32(2));
FixupBranch slow = J_CC(CC_Z, true);
// Fast path: compute full address, then zero out 32 bytes of memory.
PXOR(XMM0, R(XMM0));
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
// Slow path: call the general-case code.
SwitchToFarCode();
SetJumpTarget(slow);
}
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
ABI_CallFunctionR(&PowerPC::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
FixupBranch exit = J(true);
SwitchToNearCode();
// Mask out the address so we don't write to MEM1 out of bounds
// FIXME: Work out why the AGP disc writes out of bounds
if (!SConfig::GetInstance().bWii)
AND(32, R(RSCRATCH), Imm32(Memory::RAM_MASK));
PXOR(XMM0, R(XMM0));
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
SetJumpTarget(exit);
if (UReg_MSR(MSR).DR)
{
FixupBranch end = J(true);
SwitchToNearCode();
SetJumpTarget(end);
}
}
void Jit64::stX(UGeckoInstruction inst)

View File

@ -25,6 +25,9 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && !!(inst.SUBOP6 & 32));
@ -112,6 +115,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && !!(inst.SUBOP6 & 32));

View File

@ -401,7 +401,8 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
}
}
int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG;
int flags =
isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON;
if (!single)
flags |= SAFE_LOADSTORE_NO_SWAP;
@ -459,7 +460,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
if (jit->jo.memcheck)
{
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE_LOAD;
int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG;
int flags =
isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON;
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags);
if (!single && (type == QUANTIZE_U8 || type == QUANTIZE_S8))
{
@ -582,7 +584,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
if (jit->jo.memcheck)
{
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE;
int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG;
int flags =
isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON;
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags);
}

View File

@ -23,6 +23,9 @@ void JitArm64::psq_l(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || !jo.fastmem);
// The asm routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
// X30 is LR
// X0 contains the scale
// X1 is the address
@ -103,6 +106,9 @@ void JitArm64::psq_st(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || !jo.fastmem);
// The asm routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
// X30 is LR
// X0 contains the scale
// X1 is the address
@ -179,6 +185,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
UBFM(scale_reg, scale_reg, 8, 13); // Scale
// Inline address check
// FIXME: This doesn't correctly account for the BAT configuration.
TST(addr_reg, 6, 1);
FixupBranch pass = B(CC_EQ);
FixupBranch fail = B();

View File

@ -212,7 +212,7 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
}
FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr,
BitSet32 registers_in_use, u32 mem_mask)
BitSet32 registers_in_use)
{
registers_in_use[reg_addr] = true;
if (reg_value.IsSimpleReg())
@ -227,29 +227,19 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
else
scratch = reg_addr;
// On Gamecube games with MMU, do a little bit of extra work to make sure we're not accessing the
// 0x81800000 to 0x83FFFFFF range.
// It's okay to take a shortcut and not check this range on non-MMU games, since we're already
// assuming they'll never do an invalid memory access.
// The slightly more complex check needed for Wii games using the space just above MEM1 isn't
// implemented here yet, since there are no known working Wii MMU games to test it with.
if (jit->jo.memcheck && !SConfig::GetInstance().bWii)
{
if (scratch == reg_addr)
PUSH(scratch);
else
MOV(32, R(scratch), R(reg_addr));
AND(32, R(scratch), Imm32(0x3FFFFFFF));
CMP(32, R(scratch), Imm32(0x01800000));
if (scratch == reg_addr)
POP(scratch);
return J_CC(CC_AE, farcode.Enabled());
}
if (scratch == reg_addr)
PUSH(scratch);
else
{
TEST(32, R(reg_addr), Imm32(mem_mask));
return J_CC(CC_NZ, farcode.Enabled());
}
MOV(32, R(scratch), R(reg_addr));
// Perform lookup to see if we can use fast path.
SHR(32, R(scratch), Imm8(PowerPC::BAT_INDEX_SHIFT));
TEST(32, MScaled(scratch, SCALE_4, (u32)(u64)&PowerPC::dbat_table[0]), Imm32(2));
if (scratch == reg_addr)
POP(scratch);
return J_CC(CC_Z, farcode.Enabled());
}
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
@ -305,14 +295,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
}
FixupBranch exit;
if (!slowmem)
bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || UReg_MSR(MSR).DR;
bool fast_check_address = !slowmem && dr_set;
if (fast_check_address)
{
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
if (farcode.Enabled())
SwitchToFarCode();
@ -350,7 +337,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
}
if (!slowmem)
if (fast_check_address)
{
if (farcode.Enabled())
{
@ -575,15 +562,12 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
}
}
FixupBranch slow, exit;
if (!slowmem)
FixupBranch exit;
bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || UReg_MSR(MSR).DR;
bool fast_check_address = !slowmem && dr_set;
if (fast_check_address)
{
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
if (farcode.Enabled())
SwitchToFarCode();
@ -629,7 +613,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
MemoryExceptionCheck();
if (!slowmem)
if (fast_check_address)
{
if (farcode.Enabled())
{

View File

@ -123,7 +123,7 @@ public:
}
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
BitSet32 registers_in_use, u32 mem_mask);
BitSet32 registers_in_use);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
@ -154,6 +154,7 @@ public:
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
// Force slowmem (used when generating fallbacks in trampolines)
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
SAFE_LOADSTORE_DR_ON = 32,
};
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,

View File

@ -12,6 +12,9 @@ void JitILBase::psq_st(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || inst.W);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12);
IREmitter::InstLoc val;
@ -32,6 +35,9 @@ void JitILBase::psq_l(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || inst.W);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12);
IREmitter::InstLoc val;

View File

@ -12,6 +12,7 @@
#include "Core/HW/GPFifo.h"
#include "Core/HW/MMIO.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "VideoCommon/VideoBackendBase.h"
@ -89,8 +90,20 @@ static bool IsNoExceptionFlag(XCheckTLBFlag flag)
return flag == FLAG_NO_EXCEPTION || flag == FLAG_OPCODE_NO_EXCEPTION;
}
struct TranslateAddressResult
{
enum
{
BAT_TRANSLATED,
PAGE_TABLE_TRANSLATED,
DIRECT_STORE_SEGMENT,
PAGE_FAULT
} result;
u32 address;
bool Success() const { return result <= PAGE_TABLE_TRANSLATED; }
};
template <const XCheckTLBFlag flag>
static u32 TranslateAddress(const u32 address);
static TranslateAddressResult TranslateAddress(const u32 address);
// Nasty but necessary. Super Mario Galaxy pointer relies on this stuff.
static u32 EFB_Read(const u32 addr)
@ -142,282 +155,204 @@ static void EFB_Write(u32 data, u32 addr)
}
}
BatTable ibat_table;
BatTable dbat_table;
static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite);
template <XCheckTLBFlag flag, typename T>
__forceinline static T ReadFromHardware(const u32 em_address)
template <XCheckTLBFlag flag, typename T, bool never_translate = false>
__forceinline static T ReadFromHardware(u32 em_address)
{
int segment = em_address >> 28;
bool performTranslation = UReg_MSR(MSR).DR;
// Quick check for an address that can't meet any of the following conditions,
// to speed up the MMU path.
if (!BitSet32(0xCFC)[segment] && performTranslation)
if (!never_translate && UReg_MSR(MSR).DR)
{
// TODO: Figure out the fastest order of tests for both read and write (they are probably
// different).
if (flag == FLAG_READ && (em_address & 0xF8000000) == 0xC8000000)
{
if (em_address < 0xcc000000)
return EFB_Read(em_address);
else
return (T)Memory::mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address &
0x0FFFFFFF);
}
if (segment == 0x0 || segment == 0x8 || segment == 0xC)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
return bswap((*(const T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK]));
}
if (Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
// Handle EXRAM.
// TODO: Is this supposed to be mirrored like main RAM?
return bswap((*(const T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF]));
}
if (segment == 0xE && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
return bswap((*(const T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF]));
}
}
if (Memory::bFakeVMEM && performTranslation && (segment == 0x7 || segment == 0x4))
{
// fake VMEM
return bswap((*(const T*)&Memory::m_pFakeVMEM[em_address & Memory::FAKEVMEM_MASK]));
}
if (!performTranslation)
{
if (flag == FLAG_READ && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
return EFB_Read(em_address);
else
return (T)Memory::mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
}
if (segment == 0x0)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
return bswap((*(const T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK]));
}
if (Memory::m_pEXRAM && segment == 0x1 && (em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
return bswap((*(const T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF]));
}
PanicAlert("Unable to resolve read address %x PC %x", em_address, PC);
return 0;
}
// MMU: Do page table translation
u32 tlb_addr = TranslateAddress<flag>(em_address);
if (tlb_addr == 0)
{
if (flag == FLAG_READ)
GenerateDSIException(em_address, false);
return 0;
}
// Handle loads that cross page boundaries (ewwww)
// The alignment check isn't strictly necessary, but since this is a rare slow path, it provides a
// faster
// (1 instruction on x86) bailout.
if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) &&
(em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
u32 tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (tlb_addr == 0 || tlb_addr_next_page == 0)
auto translated_addr = TranslateAddress<flag>(em_address);
if (!translated_addr.Success())
{
if (flag == FLAG_READ)
GenerateDSIException(em_address_next_page, false);
GenerateDSIException(em_address, false);
return 0;
}
T var = 0;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++)
if ((em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page;
var = (var << 8) | Memory::physical_base[tlb_addr];
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
auto addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (!addr_next_page.Success())
{
if (flag == FLAG_READ)
GenerateDSIException(em_address_next_page, false);
return 0;
}
T var = 0;
u32 addr_translated = translated_addr.address;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, addr_translated++)
{
if (addr == em_address_next_page)
addr_translated = addr_next_page.address;
var = (var << 8) | ReadFromHardware<flag, u8, true>(addr_translated);
}
return var;
}
return var;
em_address = translated_addr.address;
}
// The easy case!
return bswap(*(const T*)&Memory::physical_base[tlb_addr]);
// TODO: Make sure these are safe for unaligned addresses.
// Locked L1 technically doesn't have a fixed address, but games all use 0xE0000000.
if ((em_address >> 28) == 0xE && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
return bswap((*(const T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF]));
}
// In Fake-VMEM mode, we need to map the memory somewhere into
// physical memory for BAT translation to work; we currently use
// [0x7E000000, 0x80000000).
if (Memory::bFakeVMEM && ((em_address & 0xFE000000) == 0x7E000000))
{
return bswap(*(T*)&Memory::m_pFakeVMEM[em_address & Memory::RAM_MASK]);
}
if (flag == FLAG_READ && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
return EFB_Read(em_address);
else
return (T)Memory::mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
}
if ((em_address & 0xF8000000) == 0x00000000)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
return bswap((*(const T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK]));
}
if (Memory::m_pEXRAM && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
return bswap((*(const T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF]));
}
PanicAlert("Unable to resolve read address %x PC %x", em_address, PC);
return 0;
}
template <XCheckTLBFlag flag, typename T>
template <XCheckTLBFlag flag, typename T, bool never_translate = false>
__forceinline static void WriteToHardware(u32 em_address, const T data)
{
int segment = em_address >> 28;
// Quick check for an address that can't meet any of the following conditions,
// to speed up the MMU path.
bool performTranslation = UReg_MSR(MSR).DR;
if (!BitSet32(0xCFC)[segment] && performTranslation)
if (!never_translate && UReg_MSR(MSR).DR)
{
// First, let's check for FIFO writes, since they are probably the most common
// reason we end up in this function.
// Note that we must mask the address to correctly emulate certain games;
// Pac-Man World 3 in particular is affected by this.
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0xCC008000)
{
switch (sizeof(T))
{
case 1:
GPFifo::Write8((u8)data);
return;
case 2:
GPFifo::Write16((u16)data);
return;
case 4:
GPFifo::Write32((u32)data);
return;
case 8:
GPFifo::Write64((u64)data);
return;
}
}
if (flag == FLAG_WRITE && (em_address & 0xF8000000) == 0xC8000000)
{
if (em_address < 0xcc000000)
{
// TODO: This only works correctly for 32-bit writes.
EFB_Write((u32)data, em_address);
return;
}
else
{
Memory::mmio_mapping->Write(em_address & 0x0FFFFFFF, data);
return;
}
}
if (segment == 0x0 || segment == 0x8 || segment == 0xC)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
*(T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
if (Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
// Handle EXRAM.
// TODO: Is this supposed to be mirrored like main RAM?
*(T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
if (segment == 0xE && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
*(T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
}
if (Memory::bFakeVMEM && performTranslation && (segment == 0x7 || segment == 0x4))
{
// fake VMEM
*(T*)&Memory::m_pFakeVMEM[em_address & Memory::FAKEVMEM_MASK] = bswap(data);
return;
}
if (!performTranslation)
{
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0x0C008000)
{
switch (sizeof(T))
{
case 1:
GPFifo::Write8((u8)data);
return;
case 2:
GPFifo::Write16((u16)data);
return;
case 4:
GPFifo::Write32((u32)data);
return;
case 8:
GPFifo::Write64((u64)data);
return;
}
}
if (flag == FLAG_WRITE && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
{
// TODO: This only works correctly for 32-bit writes.
EFB_Write((u32)data, em_address);
return;
}
else
{
Memory::mmio_mapping->Write(em_address, data);
return;
}
}
if (segment == 0x0)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
*(T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
if (Memory::m_pEXRAM && segment == 0x1 && (em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
*(T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
PanicAlert("Unable to resolve write address %x PC %x", em_address, PC);
return;
}
// MMU: Do page table translation
u32 tlb_addr = TranslateAddress<flag>(em_address);
if (tlb_addr == 0)
{
if (flag == FLAG_WRITE)
GenerateDSIException(em_address, true);
return;
}
// Handle stores that cross page boundaries (ewwww)
if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) &&
(em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
T val = bswap(data);
// We need to check both addresses before writing in case there's a DSI.
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
u32 tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (tlb_addr_next_page == 0)
auto translated_addr = TranslateAddress<flag>(em_address);
if (!translated_addr.Success())
{
if (flag == FLAG_WRITE)
GenerateDSIException(em_address_next_page, true);
GenerateDSIException(em_address, true);
return;
}
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++, val >>= 8)
if ((em_address & (sizeof(T) - 1)) &&
(em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page;
Memory::physical_base[tlb_addr] = (u8)val;
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
auto addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (!addr_next_page.Success())
{
if (flag == FLAG_WRITE)
GenerateDSIException(em_address_next_page, true);
return;
}
T val = bswap(data);
u32 addr_translated = translated_addr.address;
for (u32 addr = em_address; addr < em_address + sizeof(T);
addr++, addr_translated++, val >>= 8)
{
if (addr == em_address_next_page)
addr_translated = addr_next_page.address;
WriteToHardware<flag, u8, true>(addr_translated, (u8)val);
}
return;
}
em_address = translated_addr.address;
}
// TODO: Make sure these are safe for unaligned addresses.
// Locked L1 technically doesn't have a fixed address, but games all use 0xE0000000.
if ((em_address >> 28 == 0xE) && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
*(T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
// The easy case!
*(T*)&Memory::physical_base[tlb_addr] = bswap(data);
// In Fake-VMEM mode, we need to map the memory somewhere into
// physical memory for BAT translation to work; we currently use
// [0x7E000000, 0x80000000).
if (Memory::bFakeVMEM && ((em_address & 0xFE000000) == 0x7E000000))
{
*(T*)&Memory::m_pFakeVMEM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
// Check for a gather pipe write.
// Note that we must mask the address to correctly emulate certain games;
// Pac-Man World 3 in particular is affected by this.
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0x0C008000)
{
switch (sizeof(T))
{
case 1:
GPFifo::Write8((u8)data);
return;
case 2:
GPFifo::Write16((u16)data);
return;
case 4:
GPFifo::Write32((u32)data);
return;
case 8:
GPFifo::Write64((u64)data);
return;
}
}
if (flag == FLAG_WRITE && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
{
EFB_Write((u32)data, em_address);
return;
}
else
{
Memory::mmio_mapping->Write(em_address, data);
return;
}
}
if ((em_address & 0xF8000000) == 0x00000000)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
*(T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
if (Memory::m_pEXRAM && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
*(T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
PanicAlert("Unable to resolve write address %x PC %x", em_address, PC);
return;
}
// =====================
@ -444,49 +379,28 @@ TryReadInstResult TryReadInstruction(u32 address)
bool from_bat = true;
if (UReg_MSR(MSR).IR)
{
// TODO: Use real translation.
if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1))
auto tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (!tlb_addr.Success())
{
u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (tlb_addr == 0)
{
return TryReadInstResult{false, false, 0};
}
else
{
address = tlb_addr;
from_bat = false;
}
return TryReadInstResult{false, false, 0};
}
else
{
int segment = address >> 28;
if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
{
address = address & 0x3FFFFFFF;
}
else if (segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
address = address & 0x3FFFFFFF;
}
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
{
u32 hex = bswap((*(const u32*)&Memory::m_pFakeVMEM[address & Memory::FAKEVMEM_MASK]));
return TryReadInstResult{true, true, hex};
}
else
{
return TryReadInstResult{false, false, 0};
}
address = tlb_addr.address;
from_bat = tlb_addr.result == TranslateAddressResult::BAT_TRANSLATED;
}
}
u32 hex;
// TODO: Refactor this. This icache implementation is totally wrong if used with the fake vmem.
if (Memory::bFakeVMEM && ((address & 0xFE000000) == 0x7E000000))
{
hex = bswap(*(const u32*)&Memory::m_pFakeVMEM[address & Memory::FAKEVMEM_MASK]);
}
else
{
if (address & 0xC0000000)
ERROR_LOG(MEMMAP, "Strange program counter with address translation off: 0x%08x", address);
hex = PowerPC::ppcState.iCache.ReadInstruction(address);
}
u32 hex = PowerPC::ppcState.iCache.ReadInstruction(address);
return TryReadInstResult{true, from_bat, hex};
}
@ -697,43 +611,35 @@ bool IsOptimizableRAMAddress(const u32 address)
if (!UReg_MSR(MSR).DR)
return false;
int segment = address >> 28;
return (((segment == 0x8 || segment == 0xC || segment == 0x0) &&
(address & 0x0FFFFFFF) < Memory::REALRAM_SIZE) ||
(Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(address & 0x0FFFFFFF) < Memory::EXRAM_SIZE) ||
(segment == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE))));
// TODO: This API needs to take an access size
//
// We store whether an access can be optimized to an unchecked access
// in dbat_table.
u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT];
return (bat_result & 2) != 0;
}
bool HostIsRAMAddress(u32 address)
{
// TODO: This needs to be rewritten; it makes incorrect assumptions
// about BATs and page tables.
bool performTranslation = UReg_MSR(MSR).DR;
int segment = address >> 28;
if (performTranslation)
{
if ((segment == 0x8 || segment == 0xC || segment == 0x0) &&
(address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
return true;
else if (Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
return true;
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
return true;
else if (segment == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
return true;
address = TranslateAddress<FLAG_NO_EXCEPTION>(address);
if (!address)
auto translate_address = TranslateAddress<FLAG_NO_EXCEPTION>(address);
if (!translate_address.Success())
return false;
address = translate_address.address;
segment = address >> 28;
}
if (segment == 0x0 && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
return true;
else if (Memory::m_pEXRAM && segment == 0x1 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
return true;
else if (Memory::bFakeVMEM && ((address & 0xFE000000) == 0x7E000000))
return true;
else if (segment == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
return true;
return false;
}
@ -809,12 +715,32 @@ void DMA_MemoryToLC(const u32 cacheAddr, const u32 memAddr, const u32 numBlocks)
memcpy(dst, src, 32 * numBlocks);
}
void ClearCacheLine(const u32 address)
void ClearCacheLine(u32 address)
{
// FIXME: does this do the right thing if dcbz is run on hardware memory, e.g.
// the FIFO? Do games even do that? Probably not, but we should try to be correct...
_dbg_assert_(POWERPC, (address & 0x1F) == 0);
if (UReg_MSR(MSR).DR)
{
auto translated_address = TranslateAddress<FLAG_WRITE>(address);
if (translated_address.result == TranslateAddressResult::DIRECT_STORE_SEGMENT)
{
// dcbz to direct store segments is ignored. This is a little
// unintuitive, but this is consistent with both console and the PEM.
// Advance Game Port crashes if we don't emulate this correctly.
return;
}
if (translated_address.result == TranslateAddressResult::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}
// TODO: This isn't precisely correct for non-RAM regions, but the difference
// is unlikely to matter.
for (u32 i = 0; i < 32; i += 8)
Write_U64(0, address + i);
WriteToHardware<FLAG_WRITE, u64, true>(address + i, 0);
}
u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize)
@ -826,26 +752,37 @@ u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize)
if (!UReg_MSR(MSR).DR)
return 0;
if ((address & 0xF0000000) != 0xC0000000)
// Translate address
// If we also optimize for TLB mappings, we'd have to clear the
// JitCache on each TLB invalidation.
if (!TranslateBatAddess(dbat_table, &address))
return 0;
unsigned translated = address & 0x0FFFFFFF;
bool aligned = (translated & ((accessSize >> 3) - 1)) == 0;
if (!aligned || !MMIO::IsMMIOAddress(translated))
// Check whether the address is an aligned address of an MMIO register.
bool aligned = (address & ((accessSize >> 3) - 1)) == 0;
if (!aligned || !MMIO::IsMMIOAddress(address))
return 0;
return translated;
return address;
}
bool IsOptimizableGatherPipeWrite(u32 address)
{
#ifdef ENABLE_MEM_CHECK
return false;
return 0;
#endif
if (!UReg_MSR(MSR).DR)
return false;
return 0;
return address == 0xCC008000;
// Translate address, only check BAT mapping.
// If we also optimize for TLB mappings, we'd have to clear the
// JitCache on each TLB invalidation.
if (!TranslateBatAddess(dbat_table, &address))
return 0;
// Check whether the translated address equals the address in WPAR.
return address == 0x0C008000;
}
TranslateResult JitCache_TranslateAddress(u32 address)
@ -853,36 +790,15 @@ TranslateResult JitCache_TranslateAddress(u32 address)
if (!UReg_MSR(MSR).IR)
return TranslateResult{true, true, address};
bool from_bat = true;
int segment = address >> 28;
if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1))
// TODO: We shouldn't use FLAG_OPCODE if the caller is the debugger.
auto tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (!tlb_addr.Success())
{
u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (tlb_addr == 0)
{
return TranslateResult{false, false, 0};
}
else
{
address = tlb_addr;
from_bat = false;
}
}
else
{
if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
address = address & 0x3FFFFFFF;
else if (Memory::m_pEXRAM && segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
address = address & 0x3FFFFFFF;
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
address = 0x7E000000 | (address & Memory::FAKEVMEM_MASK);
else
return TranslateResult{false, false, 0};
return TranslateResult{false, false, 0};
}
return TranslateResult{true, from_bat, address};
bool from_bat = tlb_addr.result == TranslateAddressResult::BAT_TRANSLATED;
return TranslateResult{true, from_bat, tlb_addr.address};
}
// *********************************************************************************
@ -1115,7 +1031,8 @@ void InvalidateTLBEntry(u32 address)
}
// Page Address Translation
static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLBFlag flag)
static __forceinline TranslateAddressResult TranslatePageAddress(const u32 address,
const XCheckTLBFlag flag)
{
// TLB cache
// This catches 99%+ of lookups in practice, so the actual page table entry code below doesn't
@ -1124,10 +1041,19 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
u32 translatedAddress = 0;
TLBLookupResult res = LookupTLBPageAddress(flag, address, &translatedAddress);
if (res == TLB_FOUND)
return translatedAddress;
return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED, translatedAddress};
u32 sr = PowerPC::ppcState.sr[EA_SR(address)];
if (sr & 0x80000000)
return TranslateAddressResult{TranslateAddressResult::DIRECT_STORE_SEGMENT, 0};
// TODO: Handle KS/KP segment register flags.
// No-execute segment register flag.
if ((flag == FLAG_OPCODE || flag == FLAG_OPCODE_NO_EXCEPTION) && (sr & 0x10000000))
return TranslateAddressResult{TranslateAddressResult::PAGE_FAULT, 0};
u32 offset = EA_Offset(address); // 12 bit
u32 page_index = EA_PageIndex(address); // 16 bit
u32 VSID = SR_VSID(sr); // 24 bit
@ -1181,19 +1107,141 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
if (res != TLB_UPDATE_C)
UpdateTLBEntry(flag, PTE2, address);
return (PTE2.RPN << 12) | offset;
return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED,
(PTE2.RPN << 12) | offset};
}
}
}
return 0;
return TranslateAddressResult{TranslateAddressResult::PAGE_FAULT, 0};
}
static void UpdateBATs(BatTable& bat_table, u32 base_spr)
{
// TODO: Separate BATs for MSR.PR==0 and MSR.PR==1
// TODO: Handle PP/WIMG settings.
// TODO: Check how hardware reacts to overlapping BATs (including
// BATs which should cause a DSI).
// TODO: Check how hardware reacts to invalid BATs (bad mask etc).
for (int i = 0; i < 4; ++i)
{
u32 spr = base_spr + i * 2;
UReg_BAT_Up batu = PowerPC::ppcState.spr[spr];
UReg_BAT_Lo batl = PowerPC::ppcState.spr[spr + 1];
if (batu.VS == 0 && batu.VP == 0)
continue;
if ((batu.BEPI & batu.BL) != 0)
{
// With a valid BAT, the simplest way to match is
// (input & ~BL_mask) == BEPI. For now, assume it's
// implemented this way for invalid BATs as well.
WARN_LOG(POWERPC, "Bad BAT setup: BEPI overlaps BL");
continue;
}
if ((batl.BRPN & batu.BL) != 0)
{
// With a valid BAT, the simplest way to translate is
// (input & BL_mask) | BRPN_address. For now, assume it's
// implemented this way for invalid BATs as well.
WARN_LOG(POWERPC, "Bad BAT setup: BPRN overlaps BL");
}
if (CountSetBits((u32)(batu.BL + 1)) != 1)
{
// With a valid BAT, the simplest way of masking is
// (input & ~BL_mask) for matching and (input & BL_mask) for
// translation. For now, assume it's implemented this way for
// invalid BATs as well.
WARN_LOG(POWERPC, "Bad BAT setup: invalid mask in BL");
}
for (u32 j = 0; j <= batu.BL; ++j)
{
// Enumerate all bit-patterns which fit within the given mask.
if ((j & batu.BL) == j)
{
// This bit is a little weird: if BRPN & j != 0, we end up with
// a strange mapping. Need to check on hardware.
u32 address = (batl.BRPN | j) << BAT_INDEX_SHIFT;
// The bottom bit is whether the translation is valid; the second
// bit from the bottom is whether we can use the fastmem arena.
u32 valid_bit = 0x1;
if (Memory::bFakeVMEM && ((address & 0xFE000000) == 0x7E000000))
valid_bit = 0x3;
else if (address < Memory::REALRAM_SIZE)
valid_bit = 0x3;
else if (Memory::m_pEXRAM && (address >> 28) == 0x1 &&
(address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
valid_bit = 0x3;
else if ((address >> 28) == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
valid_bit = 0x3;
// (BEPI | j) == (BEPI & ~BL) | (j & BL).
bat_table[batu.BEPI | j] = address | valid_bit;
}
}
}
}
static void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr)
{
for (u32 i = 0; i < (0x10000000 >> BAT_INDEX_SHIFT); ++i)
{
// Map from 0x4XXXXXXX or 0x7XXXXXXX to the range
// [0x7E000000,0x80000000).
u32 e_address = i + (start_addr >> BAT_INDEX_SHIFT);
u32 p_address = 0x7E000003 | ((i << BAT_INDEX_SHIFT) & Memory::FAKEVMEM_MASK);
bat_table[e_address] = p_address;
}
}
void DBATUpdated()
{
dbat_table = {};
UpdateBATs(dbat_table, SPR_DBAT0U);
bool extended_bats = SConfig::GetInstance().bWii && HID4.SBE;
if (extended_bats)
UpdateBATs(dbat_table, SPR_DBAT4U);
if (Memory::bFakeVMEM)
{
// In Fake-MMU mode, insert some extra entries into the BAT tables.
UpdateFakeMMUBat(dbat_table, 0x40000000);
UpdateFakeMMUBat(dbat_table, 0x70000000);
}
Memory::UpdateLogicalMemory(dbat_table);
// IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here.
JitInterface::ClearSafe();
}
void IBATUpdated()
{
ibat_table = {};
UpdateBATs(ibat_table, SPR_IBAT0U);
bool extended_bats = SConfig::GetInstance().bWii && HID4.SBE;
if (extended_bats)
UpdateBATs(ibat_table, SPR_IBAT4U);
if (Memory::bFakeVMEM)
{
// In Fake-MMU mode, insert some extra entries into the BAT tables.
UpdateFakeMMUBat(ibat_table, 0x40000000);
UpdateFakeMMUBat(ibat_table, 0x70000000);
}
JitInterface::ClearSafe();
}
// Translate effective address using BAT or PAT. Returns 0 if the address cannot be translated.
// Through the hardware looks up BAT and TLB in parallel, BAT is used first if available.
// So we first check if there is a matching BAT entry, else we look for the TLB in
// TranslatePageAddress().
template <const XCheckTLBFlag flag>
__forceinline u32 TranslateAddress(const u32 address)
TranslateAddressResult TranslateAddress(const u32 address)
{
// TODO: Perform BAT translation. (At the moment, we hardcode an assumed BAT
// configuration, so there's no reason to actually check the registers.)
u32 bat_result = (flag == FLAG_OPCODE ? ibat_table : dbat_table)[address >> BAT_INDEX_SHIFT];
if (bat_result & 1)
{
u32 result_addr = (bat_result & ~3) | (address & 0x0001FFFF);
return TranslateAddressResult{TranslateAddressResult::BAT_TRANSLATED, result_addr};
}
return TranslatePageAddress(address, flag);
}

View File

@ -84,9 +84,6 @@ void PPCSymbolDB::AddKnownSymbol(u32 startAddr, u32 size, const std::string& nam
Symbol* PPCSymbolDB::GetSymbolFromAddr(u32 addr)
{
if (!PowerPC::HostIsRAMAddress(addr))
return nullptr;
XFuncMap::iterator it = functions.find(addr);
if (it != functions.end())
{

View File

@ -4,6 +4,7 @@
#pragma once
#include <array>
#include <cstddef>
#include <tuple>
@ -265,6 +266,8 @@ void ClearCacheLine(const u32 address); // Zeroes 32 bytes; address should be 3
// TLB functions
void SDRUpdated();
void InvalidateTLBEntry(u32 address);
void DBATUpdated();
void IBATUpdated();
// Result changes based on the BAT registers and MSR.DR. Returns whether
// it's safe to optimize a read or write to this address to an unguarded
@ -280,6 +283,19 @@ struct TranslateResult
u32 address;
};
TranslateResult JitCache_TranslateAddress(u32 address);
static const int BAT_INDEX_SHIFT = 17;
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB
extern BatTable ibat_table;
extern BatTable dbat_table;
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address)
{
u32 bat_result = bat_table[*address >> BAT_INDEX_SHIFT];
if ((bat_result & 1) == 0)
return false;
*address = (bat_result & ~3) | (*address & 0x0001FFFF);
return true;
}
} // namespace
enum CRBits