Merge pull request #1980 from Sonicadvance1/AArch64_more_optimizations

[AArch64] Minor optimizations
This commit is contained in:
Ryan Houdek 2015-01-30 08:33:19 -06:00
commit a277172b49
8 changed files with 322 additions and 22 deletions

View File

@ -597,6 +597,29 @@ public:
// ABI related // ABI related
void ABI_PushRegisters(BitSet32 registers); void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0)); void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
// Utility to generate a call to a std::function object.
//
// Unfortunately, calling operator() directly is undefined behavior in C++
// (this method might be a thunk in the case of multi-inheritance) so we
// have to go through a trampoline function.
template <typename T, typename... Args>
static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
Args... args)
{
(*f)(args...);
}
// This function expects you to have set up the state.
// Overwrites X0 and X30
template <typename T, typename... Args>
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
{
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
MOVI2R(X30, (u64)trampoline);
MOVI2R(X0, (u64)const_cast<void*>((const void*)f));
return X30;
}
}; };
class ARM64FloatEmitter class ARM64FloatEmitter

View File

@ -237,6 +237,7 @@ elseif(_M_ARM_64)
PowerPC/JitArm64/JitArm64_Paired.cpp PowerPC/JitArm64/JitArm64_Paired.cpp
PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp
PowerPC/JitArm64/JitArm64_SystemRegisters.cpp PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
PowerPC/JitArm64/Jit_Util.cpp
PowerPC/JitArm64/JitArm64_Tables.cpp) PowerPC/JitArm64/JitArm64_Tables.cpp)
endif() endif()

View File

@ -180,10 +180,7 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(INDEX_UNSIGNED, temp, addr, 0); emit->STRH(INDEX_UNSIGNED, temp, addr, 0);
else else
{
emit->STRB(INDEX_UNSIGNED, RS, addr, 0); emit->STRB(INDEX_UNSIGNED, RS, addr, 0);
emit->HINT(HINT_NOP);
}
} }
else else
{ {

View File

@ -7,9 +7,12 @@
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/HW/MMIO.h"
#include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/Jit_Util.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h" #include "Core/PowerPC/JitArm64/JitAsm.h"
@ -42,10 +45,9 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 ignore_mask(0);
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W30] = 0; regs_in_use[W30] = 0;
ignore_mask[dest_reg] = 1; regs_in_use[dest_reg] = 0;
ARM64Reg addr_reg = W0; ARM64Reg addr_reg = W0;
u32 imm_addr = 0; u32 imm_addr = 0;
@ -149,6 +151,12 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
{ {
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA);
} }
else if (is_immediate && MMIO::IsMMIOAddress(imm_addr))
{
MMIOLoadToReg(Memory::mmio_mapping, this,
regs_in_use, fprs_in_use, dest_reg,
imm_addr, flags);
}
else else
{ {
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
@ -160,7 +168,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
dest_reg, XA); dest_reg, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use); m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use, ignore_mask); ABI_PopRegisters(regs_in_use);
} }
gpr.Unlock(W0, W30); gpr.Unlock(W0, W30);
@ -280,15 +288,24 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
ARM64Reg XA = EncodeRegTo64(addr_reg); ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (is_immediate && Memory::IsRAMAddress(imm_addr)) if (is_immediate && Memory::IsRAMAddress(imm_addr))
{ {
MOVI2R(XA, imm_addr);
EmitBackpatchRoutine(this, flags, true, false, RS, XA); EmitBackpatchRoutine(this, flags, true, false, RS, XA);
} }
else if (is_immediate && MMIO::IsMMIOAddress(imm_addr) &&
!(flags & BackPatchInfo::FLAG_REVERSE))
{
MMIOWriteRegToAddr(Memory::mmio_mapping, this,
regs_in_use, fprs_in_use, RS,
imm_addr, flags);
}
else else
{ {
if (is_immediate)
MOVI2R(XA, imm_addr);
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);

View File

@ -176,11 +176,10 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 fpr_ignore_mask(0);
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W30] = 0; regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0 fprs_in_use[0] = 0; // Q0
fpr_ignore_mask[VD - Q0] = 1; fprs_in_use[VD - Q0] = 0;
if (is_immediate && Memory::IsRAMAddress(imm_addr)) if (is_immediate && Memory::IsRAMAddress(imm_addr))
{ {
@ -196,7 +195,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
VD, XA); VD, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use, fpr_ignore_mask); m_float_emit.ABI_PopRegisters(fprs_in_use);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }

View File

@ -273,8 +273,7 @@ void JitArm64AsmRoutineManager::GenerateCommon()
float_emit.REV32(8, D0, D0); float_emit.REV32(8, D0, D0);
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32); MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
float_emit.ST1(32, Q0, 0, addr_reg, SP); float_emit.ST1(64, Q0, 0, addr_reg, SP);
float_emit.ST1(32, Q0, 1, addr_reg, SP);
RET(X30); RET(X30);
SetJumpTarget(argh); SetJumpTarget(argh);
@ -304,9 +303,9 @@ void JitArm64AsmRoutineManager::GenerateCommon()
TST(DecodeReg(addr_reg), 6, 1); TST(DecodeReg(addr_reg), 6, 1);
FixupBranch argh = B(CC_NEQ); FixupBranch argh = B(CC_NEQ);
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32); MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
float_emit.ST1(8, Q0, 0, addr_reg, SP); float_emit.ST1(16, Q0, 0, addr_reg, SP);
float_emit.ST1(8, Q0, 1, addr_reg, SP);
RET(X30); RET(X30);
SetJumpTarget(argh); SetJumpTarget(argh);
@ -335,9 +334,9 @@ void JitArm64AsmRoutineManager::GenerateCommon()
TST(DecodeReg(addr_reg), 6, 1); TST(DecodeReg(addr_reg), 6, 1);
FixupBranch argh = B(CC_NEQ); FixupBranch argh = B(CC_NEQ);
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32); MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
float_emit.ST1(8, Q0, 0, addr_reg, SP); float_emit.ST1(16, Q0, 0, addr_reg, SP);
float_emit.ST1(8, Q0, 1, addr_reg, SP);
RET(X30); RET(X30);
SetJumpTarget(argh); SetJumpTarget(argh);
@ -368,8 +367,7 @@ void JitArm64AsmRoutineManager::GenerateCommon()
TST(DecodeReg(addr_reg), 6, 1); TST(DecodeReg(addr_reg), 6, 1);
FixupBranch argh = B(CC_NEQ); FixupBranch argh = B(CC_NEQ);
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32); MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
float_emit.ST1(16, Q0, 0, addr_reg, SP); float_emit.ST1(32, Q0, 0, addr_reg, SP);
float_emit.ST1(16, Q0, 1, addr_reg, SP);
RET(X30); RET(X30);
SetJumpTarget(argh); SetJumpTarget(argh);
@ -399,8 +397,7 @@ void JitArm64AsmRoutineManager::GenerateCommon()
TST(DecodeReg(addr_reg), 6, 1); TST(DecodeReg(addr_reg), 6, 1);
FixupBranch argh = B(CC_NEQ); FixupBranch argh = B(CC_NEQ);
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32); MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
float_emit.ST1(16, Q0, 0, addr_reg, SP); float_emit.ST1(32, Q0, 0, addr_reg, SP);
float_emit.ST1(16, Q0, 1, addr_reg, SP);
RET(X30); RET(X30);
SetJumpTarget(argh); SetJumpTarget(argh);

View File

@ -0,0 +1,248 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Core/HW/MMIO.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/Jit_Util.h"
template <typename T>
class MMIOWriteCodeGenerator : public MMIO::WriteHandlingMethodVisitor<T>
{
public:
MMIOWriteCodeGenerator(ARM64XEmitter* emit, BitSet32 gprs_in_use, BitSet32 fprs_in_use,
ARM64Reg src_reg, u32 address)
: m_emit(emit), m_gprs_in_use(gprs_in_use), m_fprs_in_use(fprs_in_use),
m_src_reg(src_reg), m_address(address)
{
}
virtual void VisitNop()
{
// Do nothing
}
virtual void VisitDirect(T* addr, u32 mask)
{
WriteRegToAddr(8 * sizeof (T), addr, mask);
}
virtual void VisitComplex(const std::function<void(u32, T)>* lambda)
{
CallLambda(8 * sizeof (T), lambda);
}
private:
void StoreFromRegister(int sbits, ARM64Reg reg)
{
switch (sbits)
{
case 8:
m_emit->STRB(INDEX_UNSIGNED, reg, X0, 0);
break;
case 16:
m_emit->STRH(INDEX_UNSIGNED, reg, X0, 0);
break;
case 32:
m_emit->STR(INDEX_UNSIGNED, reg, X0, 0);
break;
default:
_assert_msg_(DYNA_REC, false, "Unknown size %d passed to MMIOWriteCodeGenerator!", sbits);
break;
}
}
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVI2R(X0, (u64)ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. ~4).
u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones)
{
StoreFromRegister(sbits, m_src_reg);
}
else
{
m_emit->MOVI2R(W1, mask);
m_emit->AND(W1, m_src_reg, W1, ArithOption(W1, ST_LSL, 0));
StoreFromRegister(sbits, W1);
}
}
void CallLambda(int sbits, const std::function<void(u32, T)>* lambda)
{
ARM64FloatEmitter float_emit(m_emit);
m_emit->ABI_PushRegisters(m_gprs_in_use);
float_emit.ABI_PushRegisters(m_fprs_in_use);
m_emit->MOVI2R(W1, m_address);
m_emit->MOV(W2, m_src_reg);
m_emit->BLR(m_emit->ABI_SetupLambda(lambda));
float_emit.ABI_PopRegisters(m_fprs_in_use);
m_emit->ABI_PopRegisters(m_gprs_in_use);
}
ARM64XEmitter* m_emit;
BitSet32 m_gprs_in_use;
BitSet32 m_fprs_in_use;
ARM64Reg m_src_reg;
u32 m_address;
};
// Visitor that generates code to read a MMIO value.
template <typename T>
class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>
{
public:
MMIOReadCodeGenerator(ARM64XEmitter* emit, BitSet32 gprs_in_use, BitSet32 fprs_in_use,
ARM64Reg dst_reg, u32 address, bool sign_extend)
: m_emit(emit), m_gprs_in_use(gprs_in_use), m_fprs_in_use(fprs_in_use),
m_dst_reg(dst_reg), m_address(address), m_sign_extend(sign_extend)
{
}
virtual void VisitConstant(T value)
{
LoadConstantToReg(8 * sizeof (T), value);
}
virtual void VisitDirect(const T* addr, u32 mask)
{
LoadAddrMaskToReg(8 * sizeof (T), addr, mask);
}
virtual void VisitComplex(const std::function<T(u32)>* lambda)
{
CallLambda(8 * sizeof (T), lambda);
}
private:
void LoadConstantToReg(int sbits, u32 value)
{
m_emit->MOVI2R(m_dst_reg, value);
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
}
void LoadToRegister(int sbits, bool dont_extend)
{
switch (sbits)
{
case 8:
if (m_sign_extend && !dont_extend)
m_emit->LDRSB(INDEX_UNSIGNED, m_dst_reg, X0, 0);
else
m_emit->LDRB(INDEX_UNSIGNED, m_dst_reg, X0, 0);
break;
case 16:
if (m_sign_extend && !dont_extend)
m_emit->LDRSH(INDEX_UNSIGNED, m_dst_reg, X0, 0);
else
m_emit->LDRH(INDEX_UNSIGNED, m_dst_reg, X0, 0);
break;
case 32:
m_emit->LDR(INDEX_UNSIGNED, m_dst_reg, X0, 0);
break;
default:
_assert_msg_(DYNA_REC, false, "Unknown size %d passed to MMIOReadCodeGenerator!", sbits);
break;
}
}
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVI2R(X0, (u64)ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. ~4).
u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones)
{
LoadToRegister(sbits, false);
}
else
{
LoadToRegister(sbits, true);
m_emit->MOVI2R(W0, mask);
m_emit->AND(m_dst_reg, m_dst_reg, W0, ArithOption(W0, ST_LSL, 0));
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
}
}
void CallLambda(int sbits, const std::function<T(u32)>* lambda)
{
ARM64FloatEmitter float_emit(m_emit);
m_emit->ABI_PushRegisters(m_gprs_in_use);
float_emit.ABI_PushRegisters(m_fprs_in_use);
m_emit->MOVI2R(W1, m_address);
m_emit->BLR(m_emit->ABI_SetupLambda(lambda));
float_emit.ABI_PopRegisters(m_fprs_in_use);
m_emit->ABI_PopRegisters(m_gprs_in_use);
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, W0, 0, sbits - 1);
else
m_emit->UBFM(m_dst_reg, W0, 0, sbits - 1);
}
ARM64XEmitter* m_emit;
BitSet32 m_gprs_in_use;
BitSet32 m_fprs_in_use;
ARM64Reg m_dst_reg;
u32 m_address;
bool m_sign_extend;
};
void MMIOLoadToReg(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit,
BitSet32 gprs_in_use, BitSet32 fprs_in_use,
ARM64Reg dst_reg, u32 address, u32 flags)
{
if (flags & BackPatchInfo::FLAG_SIZE_8)
{
MMIOReadCodeGenerator<u8> gen(emit, gprs_in_use, fprs_in_use, dst_reg,
address, flags & BackPatchInfo::FLAG_EXTEND);
mmio->GetHandlerForRead<u8>(address).Visit(gen);
}
else if (flags & BackPatchInfo::FLAG_SIZE_16)
{
MMIOReadCodeGenerator<u16> gen(emit, gprs_in_use, fprs_in_use, dst_reg,
address, flags & BackPatchInfo::FLAG_EXTEND);
mmio->GetHandlerForRead<u16>(address).Visit(gen);
}
else if (flags & BackPatchInfo::FLAG_SIZE_32)
{
MMIOReadCodeGenerator<u32> gen(emit, gprs_in_use, fprs_in_use, dst_reg,
address, flags & BackPatchInfo::FLAG_EXTEND);
mmio->GetHandlerForRead<u32>(address).Visit(gen);
}
}
void MMIOWriteRegToAddr(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit,
BitSet32 gprs_in_use, BitSet32 fprs_in_use,
ARM64Reg src_reg, u32 address, u32 flags)
{
if (flags & BackPatchInfo::FLAG_SIZE_8)
{
MMIOWriteCodeGenerator<u8> gen(emit, gprs_in_use, fprs_in_use, src_reg,
address);
mmio->GetHandlerForWrite<u8>(address).Visit(gen);
}
else if (flags & BackPatchInfo::FLAG_SIZE_16)
{
MMIOWriteCodeGenerator<u16> gen(emit, gprs_in_use, fprs_in_use, src_reg,
address);
mmio->GetHandlerForWrite<u16>(address).Visit(gen);
}
else if (flags & BackPatchInfo::FLAG_SIZE_32)
{
MMIOWriteCodeGenerator<u32> gen(emit, gprs_in_use, fprs_in_use, src_reg,
address);
mmio->GetHandlerForWrite<u32>(address).Visit(gen);
}
}

View File

@ -0,0 +1,18 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include "Common/Arm64Emitter.h"
#include "Common/Common.h"
#include "Core/HW/MMIO.h"
void MMIOLoadToReg(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit,
BitSet32 gprs_in_use, BitSet32 fprs_in_use,
ARM64Reg dst_reg, u32 address, u32 flags);
void MMIOWriteRegToAddr(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit,
BitSet32 gprs_in_use, BitSet32 fprs_in_use,
ARM64Reg src_reg, u32 address, u32 flags);