Merge pull request #1723 from Sonicadvance1/AArch64_loadstores

[AArch64] Implements loadstores with AArch64
This commit is contained in:
Ryan Houdek 2014-12-21 20:46:23 -06:00
commit 4fad742da4
11 changed files with 1028 additions and 61 deletions

View File

@ -164,7 +164,7 @@ if(NOT ENABLE_GENERIC)
set(_M_ARM 1)
set(_M_ARM_64 1)
add_definitions(-D_M_ARM=1 -D_M_ARM_64=1)
set(ENABLE_GENERIC 1)
add_definitions(-march=armv8-a+crc)
else()
set(ENABLE_GENERIC 1)
endif()

View File

@ -223,6 +223,7 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitAsm.cpp
PowerPC/JitArm64/JitArm64Cache.cpp
PowerPC/JitArm64/JitArm64_RegCache.cpp
PowerPC/JitArm64/JitArm64_BackPatch.cpp
PowerPC/JitArm64/JitArm64_Branch.cpp
PowerPC/JitArm64/JitArm64_Integer.cpp
PowerPC/JitArm64/JitArm64_LoadStore.cpp

View File

@ -24,6 +24,7 @@
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0]))
@ -48,26 +49,8 @@ private:
ArmFPRCache fpr;
PPCAnalyst::CodeBuffer code_buffer;
struct BackPatchInfo
{
enum
{
FLAG_STORE = (1 << 0),
FLAG_LOAD = (1 << 1),
FLAG_SIZE_8 = (1 << 2),
FLAG_SIZE_16 = (1 << 3),
FLAG_SIZE_32 = (1 << 4),
FLAG_SIZE_F32 = (1 << 5),
FLAG_SIZE_F64 = (1 << 6),
FLAG_REVERSE = (1 << 7),
FLAG_EXTEND = (1 << 8),
};
u32 m_fastmem_size;
u32 m_fastmem_trouble_inst_offset;
u32 m_slowmem_size;
};
// The key is the flags
// The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info;
void DoDownCount();

View File

@ -12,8 +12,6 @@
using namespace Arm64Gen;
static int CODE_SIZE = 1024*1024*32;
void JitArm64::Init()
{
AllocCodeSpace(CODE_SIZE);
@ -27,6 +25,7 @@ void JitArm64::Init()
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
InitBackpatch();
}
void JitArm64::ClearCache()
@ -151,6 +150,25 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest)
BR(EncodeRegTo64(dest));
}
void JitArm64::WriteExceptionExit()
{
DoDownCount();
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
MOVI2R(XA, (u64)&PowerPC::CheckExceptions);
BLR(XA);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
MOVI2R(XA, (u64)asm_routines.dispatcher);
BR(XA);
gpr.Unlock(WA);
}
void JitArm64::WriteExitDestInR(ARM64Reg Reg)
{
STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc));
@ -261,6 +279,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
}
if (!ops[i].skip)
{
if (js.memcheck && (opinfo->flags & FL_USE_FPU))

View File

@ -4,6 +4,8 @@
#pragma once
#include <map>
#include "Common/Arm64Emitter.h"
#include "Core/PowerPC/CPUCoreBase.h"
@ -11,6 +13,7 @@
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitArm64Cache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#define PPCSTATE_OFF(elem) ((s64)&PowerPC::ppcState.elem - (s64)&PowerPC::ppcState)
@ -31,11 +34,9 @@ public:
JitBaseBlockCache *GetBlockCache() { return &blocks; }
const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return nullptr; }
bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); }
bool HandleFault(uintptr_t access_address, SContext* ctx) override { return false; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
void ClearCache();
@ -96,6 +97,8 @@ public:
// LoadStore
void icbi(UGeckoInstruction inst);
void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst);
private:
Arm64GPRCache gpr;
@ -106,14 +109,26 @@ private:
PPCAnalyst::CodeBuffer code_buffer;
// The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info;
// Backpatching routines
bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg);
void InitBackpatch();
u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr);
// Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b);
void DoDownCount();
// Exits
void WriteExit(u32 destination);
void WriteExceptionExit(ARM64Reg dest);
void WriteExitDestInR(ARM64Reg dest);
void WriteExceptionExit(Arm64Gen::ARM64Reg dest);
void WriteExceptionExit();
void WriteExitDestInR(Arm64Gen::ARM64Reg dest);
FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);

View File

@ -0,0 +1,453 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <string>
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
using namespace Arm64Gen;
static void DoBacktrace(uintptr_t access_address, SContext* ctx)
{
for (int i = 0; i < 30; i += 2)
ERROR_LOG(DYNA_REC, "R%d: 0x%016llx\tR%d: 0x%016llx", i, ctx->CTX_REG(i), i + 1, ctx->CTX_REG(i + 1));
ERROR_LOG(DYNA_REC, "R30: 0x%016llx\tSP: 0x%016llx", ctx->CTX_REG(30), ctx->CTX_SP);
ERROR_LOG(DYNA_REC, "Access Address: 0x%016lx", access_address);
ERROR_LOG(DYNA_REC, "PC: 0x%016llx", ctx->CTX_PC);
ERROR_LOG(DYNA_REC, "Memory Around PC");
std::string pc_memory = "";
for (u64 pc = (ctx->CTX_PC - 32); pc < (ctx->CTX_PC + 32); pc += 16)
{
pc_memory += StringFromFormat("%08x%08x%08x%08x",
*(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12));
ERROR_LOG(DYNA_REC, "0x%016lx: %08x %08x %08x %08x",
pc, *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12));
}
ERROR_LOG(DYNA_REC, "Full block: %s", pc_memory.c_str());
}
bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
{
u32 inst = *(u32*)ptr;
u32 prev_inst = *(u32*)(ptr - 4);
u32 next_inst = *(u32*)(ptr + 4);
u8 op = (inst >> 22) & 0xFF;
u8 size = (inst >> 30) & 0x3;
if (size == 0) // 8-bit
*flags |= BackPatchInfo::FLAG_SIZE_8;
else if (size == 1) // 16-bit
*flags |= BackPatchInfo::FLAG_SIZE_16;
else // 32-bit
*flags |= BackPatchInfo::FLAG_SIZE_32;
if (op == 0xE5) // Load
{
*flags |= BackPatchInfo::FLAG_LOAD;
*reg = (ARM64Reg)(inst & 0x1F);
if ((next_inst & 0x7FFFF000) != 0x5AC00000) // REV
*flags |= BackPatchInfo::FLAG_REVERSE;
if ((next_inst & 0x7F800000) == 0x13000000) // SXTH
*flags |= BackPatchInfo::FLAG_EXTEND;
return true;
}
else if (op == 0xE4) // Store
{
*flags |= BackPatchInfo::FLAG_STORE;
if (size == 0) // 8-bit
*reg = (ARM64Reg)(inst & 0x1F);
else // 16-bit/32-bit register is in previous REV instruction
*reg = (ARM64Reg)((prev_inst >> 5) & 0x1F);
return true;
}
return false;
}
u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARM64Reg RS, ARM64Reg addr)
{
u32 trouble_offset = 0;
const u8* code_base = emit->GetCodePtr();
if (fastmem)
{
MOVK(addr, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg temp = W0;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV32(temp, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(temp, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->STR(INDEX_UNSIGNED, temp, addr, 0);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(INDEX_UNSIGNED, temp, addr, 0);
else
{
emit->STRB(INDEX_UNSIGNED, RS, addr, 0);
emit->HINT(HINT_NOP);
}
}
else
{
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->LDR(INDEX_UNSIGNED, RS, addr, 0);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->LDRH(INDEX_UNSIGNED, RS, addr, 0);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->LDRB(INDEX_UNSIGNED, RS, addr, 0);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV32(RS, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, RS);
}
if (flags & BackPatchInfo::FLAG_EXTEND)
emit->SXTH(RS, RS);
}
}
else
{
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
emit->MOV(W0, RS);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(X30, (u64)&Memory::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(X30, (u64)&Memory::Write_U16);
else
emit->MOVI2R(X30, (u64)&Memory::Write_U8);
emit->BLR(X30);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(X30, (u64)&Memory::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(X30, (u64)&Memory::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->MOVI2R(X30, (u64)&Memory::Read_U8);
emit->BLR(X30);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
emit->MOV(RS, W0);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV32(RS, W0);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, W0);
}
if (flags & BackPatchInfo::FLAG_EXTEND)
emit->SXTH(RS, RS);
}
}
if (do_padding)
{
BackPatchInfo& info = m_backpatch_info[flags];
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size);
u32 code_size = emit->GetCodePtr() - code_base;
code_size /= 4;
for (u32 i = 0; i < (num_insts_max - code_size); ++i)
emit->HINT(HINT_NOP);
}
return trouble_offset;
}
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
{
if (access_address < (uintptr_t)Memory::base)
{
ERROR_LOG(DYNA_REC, "Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx", ctx->CTX_PC, access_address, (uintptr_t)Memory::base);
DoBacktrace(access_address, ctx);
return false;
}
if (!IsInSpace((u8*)ctx->CTX_PC))
{
ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC));
DoBacktrace(access_address, ctx);
return false;
}
ARM64Reg reg = INVALID_REG;
u32 flags = 0;
if (!DisasmLoadStore((const u8*)ctx->CTX_PC, &flags, &reg))
{
ERROR_LOG(DYNA_REC, "Error disassembling address 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC));
DoBacktrace(access_address, ctx);
return false;
}
BackPatchInfo& info = m_backpatch_info[flags];
ARM64XEmitter emitter((u8*)(ctx->CTX_PC - info.m_fastmem_trouble_inst_offset * 4));
u64 new_pc = (u64)emitter.GetCodePtr();
// Slowmem routine doesn't need the address location
// It is already in the correct location
EmitBackpatchRoutine(&emitter, flags, false, true, reg, INVALID_REG);
emitter.FlushIcache();
ctx->CTX_PC = new_pc;
// Wipe the top bits of the addr_register
if (flags & BackPatchInfo::FLAG_STORE)
ctx->CTX_REG(1) &= 0xFFFFFFFFUll;
else
ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
return true;
}
void JitArm64::InitBackpatch()
{
u32 flags = 0;
BackPatchInfo info;
u8* code_base = GetWritableCodePtr();
u8* code_end;
// Loads
{
// 8bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - Extend
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_EXTEND;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - Reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit - Reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
// Stores
{
// 8bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
}

View File

@ -17,6 +17,416 @@ using namespace Arm64Gen;
void JitArm64::icbi(UGeckoInstruction inst)
{
gpr.Flush(FlushMode::FLUSH_ALL);
fpr.Flush(FlushMode::FLUSH_ALL);
FallBackToInterpreter(inst);
WriteExit(js.compilerPC + 4);
}
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(W0, W30);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg);
ARM64Reg dest_reg = gpr.R(dest);
ARM64Reg up_reg = INVALID_REG;
ARM64Reg off_reg = INVALID_REG;
if (addr != -1 && !gpr.IsImm(addr))
up_reg = gpr.R(addr);
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 ignore_mask(0);
regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
ignore_mask[dest_reg] = 1;
ARM64Reg addr_reg = W0;
u32 imm_addr = 0;
bool is_immediate = false;
if (offsetReg == -1)
{
if (addr != -1)
{
if (gpr.IsImm(addr))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + offset;
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, up_reg);
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (addr != -1)
{
if (gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg);
}
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
MOVI2R(addr_reg, gpr.GetImm(addr));
ADD(addr_reg, addr_reg, off_reg);
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
MOVI2R(addr_reg, gpr.GetImm(offsetReg));
ADD(addr_reg, addr_reg, up_reg);
}
else
{
ADD(addr_reg, up_reg, off_reg);
}
}
else
{
if (gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else
{
MOV(addr_reg, off_reg);
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA);
if (update)
MOVI2R(up_reg, imm_addr);
}
else
{
if (update)
MOV(up_reg, addr_reg);
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
dest_reg, XA);
ABI_PopRegisters(regs_in_use, ignore_mask);
}
gpr.Unlock(W0, W30);
}
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(W0, W1, W30);
ARM64Reg RS = gpr.R(value);
ARM64Reg reg_dest = INVALID_REG;
ARM64Reg reg_off = INVALID_REG;
if (regOffset != -1 && !gpr.IsImm(regOffset))
reg_off = gpr.R(regOffset);
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
ARM64Reg addr_reg = W1;
u32 imm_addr = 0;
bool is_immediate = false;
if (regOffset == -1)
{
if (dest != -1)
{
if (gpr.IsImm(dest))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + offset;
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, reg_dest);
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (dest != -1)
{
if (gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset);
}
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
MOVI2R(addr_reg, gpr.GetImm(dest));
ADD(addr_reg, addr_reg, reg_off);
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
MOVI2R(addr_reg, gpr.GetImm(regOffset));
ADD(addr_reg, addr_reg, reg_dest);
}
else
{
ADD(addr_reg, reg_dest, reg_off);
}
}
else
{
if (gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(regOffset);
}
else
{
MOV(addr_reg, reg_off);
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, RS, XA);
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
RS, XA);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W1, W30);
}
void JitArm64::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, d = inst.RD;
s32 offset = inst.SIMM_16;
s32 offsetReg = -1;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 55: // lwzux
update = true;
case 23: // lwzx
flags |= BackPatchInfo::FLAG_SIZE_32;
offsetReg = b;
break;
case 119: //lbzux
update = true;
case 87: // lbzx
flags |= BackPatchInfo::FLAG_SIZE_8;
offsetReg = b;
break;
case 311: // lhzux
update = true;
case 279: // lhzx
flags |= BackPatchInfo::FLAG_SIZE_16;
offsetReg = b;
break;
case 375: // lhaux
update = true;
case 343: // lhax
flags |= BackPatchInfo::FLAG_EXTEND |
BackPatchInfo::FLAG_SIZE_16;
offsetReg = b;
break;
case 534: // lwbrx
flags |= BackPatchInfo::FLAG_REVERSE |
BackPatchInfo::FLAG_SIZE_32;
break;
case 790: // lhbrx
flags |= BackPatchInfo::FLAG_REVERSE |
BackPatchInfo::FLAG_SIZE_16;
break;
}
break;
case 33: // lwzu
update = true;
case 32: // lwz
flags |= BackPatchInfo::FLAG_SIZE_32;
break;
case 35: // lbzu
update = true;
case 34: // lbz
flags |= BackPatchInfo::FLAG_SIZE_8;
break;
case 41: // lhzu
update = true;
case 40: // lhz
flags |= BackPatchInfo::FLAG_SIZE_16;
break;
case 43: // lhau
update = true;
case 42: // lha
flags |= BackPatchInfo::FLAG_EXTEND |
BackPatchInfo::FLAG_SIZE_16;
break;
}
FALLBACK_IF(update);
SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update);
// LWZ idle skipping
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
// if it's still 0, we can wait until the next event
FixupBranch noIdle = CBNZ(gpr.R(d));
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(XA, (u64)&PowerPC::OnIdle);
MOVI2R(W0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
BLR(XA);
gpr.Unlock(WA);
WriteExceptionExit();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
void JitArm64::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, s = inst.RS;
s32 offset = inst.SIMM_16;
s32 regOffset = -1;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 183: // stwux
update = true;
case 151: // stwx
flags |= BackPatchInfo::FLAG_SIZE_32;
regOffset = b;
break;
case 247: // stbux
update = true;
case 215: // stbx
flags |= BackPatchInfo::FLAG_SIZE_8;
regOffset = b;
break;
case 439: // sthux
update = true;
case 407: // sthx
flags |= BackPatchInfo::FLAG_SIZE_16;
regOffset = b;
break;
}
break;
case 37: // stwu
update = true;
case 36: // stw
flags |= BackPatchInfo::FLAG_SIZE_32;
break;
case 39: // stbu
update = true;
case 38: // stb
flags |= BackPatchInfo::FLAG_SIZE_8;
break;
case 45: // sthu
update = true;
case 44: // sth
flags |= BackPatchInfo::FLAG_SIZE_16;
break;
}
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset);
if (update)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg RB;
ARM64Reg RA = gpr.R(a);
if (regOffset != -1)
RB = gpr.R(regOffset);
if (regOffset == -1)
{
MOVI2R(WA, offset);
ADD(RA, RA, WA);
}
else
{
ADD(RA, RA, RB);
}
gpr.Unlock(WA);
}
}

View File

@ -29,7 +29,7 @@ ARM64Reg Arm64RegCache::GetReg()
}
// Holy cow, how did you run out of registers?
// We can't return anything reasonable in this case. Return INVALID_REG and watch the failure happen
_assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb");
WARN_LOG(DYNA_REC, "All available registers are locked dumb dumb");
return INVALID_REG;
}
@ -45,18 +45,14 @@ u32 Arm64RegCache::GetUnlockedRegisterCount()
void Arm64RegCache::LockRegister(ARM64Reg host_reg)
{
auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg);
if (reg == m_host_registers.end())
_assert_msg_(DYNA_REC, false, "Don't try locking a register that isn't in the cache");
_assert_msg_(DYNA_REC, !reg->IsLocked(), "This register is already locked");
_assert_msg_(DYNA_REC, reg == m_host_registers.end(), "Don't try locking a register that isn't in the cache");
reg->Lock();
}
void Arm64RegCache::UnlockRegister(ARM64Reg host_reg)
{
auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg);
if (reg == m_host_registers.end())
_assert_msg_(DYNA_REC, false, "Don't try unlocking a register that isn't in the cache");
_assert_msg_(DYNA_REC, reg->IsLocked(), "This register is already unlocked");
_assert_msg_(DYNA_REC, reg == m_host_registers.end(), "Don't try unlocking a register that isn't in the cache");
reg->Unlock();
}
@ -75,17 +71,19 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg)
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
}
void Arm64GPRCache::FlushRegister(u32 preg)
void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_REG)
{
ARM64Reg host_reg = reg.GetReg();
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
UnlockRegister(host_reg);
reg.Flush();
if (!maintain_state)
{
UnlockRegister(host_reg);
reg.Flush();
}
}
else if (reg.GetType() == REG_IMM)
{
@ -103,7 +101,8 @@ void Arm64GPRCache::FlushRegister(u32 preg)
UnlockRegister(host_reg);
}
reg.Flush();
if (!maintain_state)
reg.Flush();
}
}
@ -126,12 +125,12 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
// Has to be flushed if it isn't in a callee saved register
ARM64Reg host_reg = m_guest_registers[i].GetReg();
if (flush || !IsCalleeSaved(host_reg))
FlushRegister(i);
FlushRegister(i, mode == FLUSH_MAINTAIN_STATE);
}
else if (m_guest_registers[i].GetType() == REG_IMM)
{
if (flush)
FlushRegister(i);
FlushRegister(i, mode == FLUSH_MAINTAIN_STATE);
}
}
}
@ -166,7 +165,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
}
break;
default:
_dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!");
ERROR_LOG(DYNA_REC, "Invalid OpArg Type!");
break;
}
// We've got an issue if we end up here
@ -177,18 +176,35 @@ void Arm64GPRCache::SetImmediate(u32 preg, u32 imm)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_REG)
Unlock(reg.GetReg());
UnlockRegister(reg.GetReg());
reg.LoadToImm(imm);
}
void Arm64GPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
if (do_load)
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
}
}
void Arm64GPRCache::GetAllocationOrder()
{
// Callee saved registers first in hopes that we will keep everything stored there first
const std::vector<ARM64Reg> allocation_order =
{
// Callee saved
W28, W27, W26, W25, W24, W23, W22, W21, W20,
W19, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9,
W10, W11, W12, W13, W14, W15, W16, W17, W18,
W19,
// Caller saved
W18, W17, W16, W15, W14, W13, W12, W11, W10,
W9, W8, W7, W6, W5, W4, W3, W2, W1, W0,
W30,
};
@ -210,7 +226,29 @@ void Arm64GPRCache::FlushMostStaleRegister()
most_stale_amount = last_used;
}
}
FlushRegister(most_stale_preg);
FlushRegister(most_stale_preg, false);
}
BitSet32 Arm64GPRCache::GetCallerSavedUsed()
{
BitSet32 registers(0);
for (auto& it : m_host_registers)
if (it.IsLocked() && !IsCalleeSaved(it.GetReg()))
registers[it.GetReg()] = 1;
return registers;
}
void Arm64GPRCache::FlushByHost(ARM64Reg host_reg)
{
for (int i = 0; i < 32; ++i)
{
OpArg& reg = m_guest_registers[i];
if (reg.GetType() == REG_REG && reg.GetReg() == host_reg)
{
FlushRegister(i, false);
return;
}
}
}
// FPR Cache
@ -243,3 +281,8 @@ void Arm64FPRCache::FlushMostStaleRegister()
// XXX: Flush a register
}
void Arm64FPRCache::FlushByHost(ARM64Reg host_reg)
{
// XXX: Scan guest registers and flush if found
}

View File

@ -144,6 +144,7 @@ public:
{
for (T reg : {args...})
{
FlushByHost(reg);
LockRegister(reg);
}
}
@ -155,6 +156,7 @@ public:
{
for (T reg : {args...})
{
FlushByHost(reg);
UnlockRegister(reg);
}
}
@ -172,6 +174,9 @@ protected:
// Unlock a register
void UnlockRegister(ARM64Reg host_reg);
// Flushes a guest register by host provided
virtual void FlushByHost(ARM64Reg host_reg) = 0;
// Get available host registers
u32 GetUnlockedRegisterCount();
@ -208,6 +213,12 @@ public:
// Gets the immediate that a register is set to
u32 GetImm(u32 reg) { return m_guest_registers[reg].GetImm(); }
void BindToRegister(u32 preg, bool do_load);
void StoreRegister(u32 preg) { FlushRegister(preg, false); }
BitSet32 GetCallerSavedUsed();
protected:
// Get the order of the host registers
void GetAllocationOrder();
@ -215,6 +226,9 @@ protected:
// Flushes the most stale register
void FlushMostStaleRegister();
// Flushes a guest register by host provided
void FlushByHost(ARM64Reg host_reg) override;
// Our guest GPRs
// PowerPC has 32 GPRs
OpArg m_guest_registers[32];
@ -228,7 +242,7 @@ private:
reg.IncrementLastUsed();
}
void FlushRegister(u32 preg);
void FlushRegister(u32 preg, bool maintain_state);
};
class Arm64FPRCache : public Arm64RegCache
@ -249,6 +263,9 @@ protected:
// Flushes the most stale register
void FlushMostStaleRegister();
// Flushes a guest register by host provided
void FlushByHost(ARM64Reg host_reg) override;
// Our guest FPRs
// Gekko has 32 paired registers(32x2)
OpArg m_guest_registers[32][2];

View File

@ -65,21 +65,21 @@ static GekkoOPTemplate primarytable[] =
{28, &JitArm64::arith_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{29, &JitArm64::arith_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{32, &JitArm64::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{33, &JitArm64::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{34, &JitArm64::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{35, &JitArm64::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{40, &JitArm64::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{41, &JitArm64::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{42, &JitArm64::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{43, &JitArm64::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{32, &JitArm64::lXX}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{33, &JitArm64::lXX}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{34, &JitArm64::lXX}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{35, &JitArm64::lXX}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{40, &JitArm64::lXX}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{41, &JitArm64::lXX}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{42, &JitArm64::lXX}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{43, &JitArm64::lXX}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{44, &JitArm64::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{45, &JitArm64::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{36, &JitArm64::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{37, &JitArm64::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{38, &JitArm64::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{39, &JitArm64::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{44, &JitArm64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{45, &JitArm64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{36, &JitArm64::stX}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{37, &JitArm64::stX}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{38, &JitArm64::stX}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{39, &JitArm64::stX}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},

View File

@ -0,0 +1,26 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include "Common/CommonTypes.h"
struct BackPatchInfo
{
enum
{
FLAG_STORE = (1 << 0),
FLAG_LOAD = (1 << 1),
FLAG_SIZE_8 = (1 << 2),
FLAG_SIZE_16 = (1 << 3),
FLAG_SIZE_32 = (1 << 4),
FLAG_SIZE_F32 = (1 << 5),
FLAG_SIZE_F64 = (1 << 6),
FLAG_REVERSE = (1 << 7),
FLAG_EXTEND = (1 << 8),
};
u32 m_fastmem_size;
u32 m_fastmem_trouble_inst_offset;
u32 m_slowmem_size;
};