Remove ARMv7 support.

This commit is contained in:
Ryan Houdek
2015-06-07 22:44:13 -05:00
parent 499478bcad
commit 59e2225f7d
36 changed files with 6 additions and 10619 deletions

View File

@ -1,513 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <map>
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PatchEngine.h"
#include "Core/HLE/HLE.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
using namespace ArmGen;
void JitArm::Init()
{
AllocCodeSpace(CODE_SIZE);
blocks.Init();
asm_routines.Init();
gpr.Init(this);
fpr.Init(this);
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true;
UpdateMemoryOptions();
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
InitBackpatch();
// Disable all loadstores
// Ever since the MMU has been optimized for x86, loadstores on ARMv7 have been knackered
// XXX: Investigate exactly why these are broken
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStoreOff = true;
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStoreFloatingOff = true;
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStorePairedOff= true;
}
void JitArm::ClearCache()
{
ClearCodeSpace();
blocks.Clear();
UpdateMemoryOptions();
}
void JitArm::Shutdown()
{
FreeCodeSpace();
blocks.Shutdown();
asm_routines.Shutdown();
}
// This is only called by FallBackToInterpreter() in this file. It will execute an instruction with the interpreter functions.
void JitArm::WriteCallInterpreter(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
MOVI2R(R0, inst.hex);
MOVI2R(R12, (u32)instr);
BL(R12);
}
void JitArm::FallBackToInterpreter(UGeckoInstruction _inst)
{
WriteCallInterpreter(_inst.hex);
}
void JitArm::HLEFunction(UGeckoInstruction _inst)
{
gpr.Flush();
fpr.Flush();
MOVI2R(R0, js.compilerPC);
MOVI2R(R1, _inst.hex);
QuickCallFunction(R14, (void*)&HLE::Execute);
ARMReg rA = gpr.GetReg();
LDR(rA, R9, PPCSTATE_OFF(npc));
WriteExitDestInR(rA);
}
void JitArm::DoNothing(UGeckoInstruction _inst)
{
// Yup, just don't do anything.
}
static const bool ImHereDebug = false;
static const bool ImHereLog = false;
static std::map<u32, int> been_here;
static void ImHere()
{
static File::IOFile f;
if (ImHereLog)
{
if (!f)
{
f.Open("log32.txt", "w");
}
fprintf(f.GetHandle(), "%08x\n", PC);
}
if (been_here.find(PC) != been_here.end())
{
been_here.find(PC)->second++;
if ((been_here.find(PC)->second) & 1023)
return;
}
DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR);
been_here[PC] = 1;
}
void JitArm::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
PUSH(4, R0, R1, R2, R3);
QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe);
POP(4, R0, R1, R2, R3);
}
}
void JitArm::DoDownCount()
{
ARMReg rA = gpr.GetReg();
LDR(rA, R9, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
{
SUBS(rA, rA, js.downcountAmount);
}
else
{
ARMReg rB = gpr.GetReg(false);
MOVI2R(rB, js.downcountAmount);
SUBS(rA, rA, rB);
}
STR(rA, R9, PPCSTATE_OFF(downcount));
gpr.Unlock(rA);
}
void JitArm::WriteExitDestInR(ARMReg Reg)
{
STR(Reg, R9, PPCSTATE_OFF(pc));
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(Reg, (u32)asm_routines.dispatcher);
B(Reg);
gpr.Unlock(Reg);
}
void JitArm::WriteRfiExitDestInR(ARMReg Reg)
{
STR(Reg, R9, PPCSTATE_OFF(pc));
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
ARMReg A = gpr.GetReg(false);
LDR(A, R9, PPCSTATE_OFF(pc));
STR(A, R9, PPCSTATE_OFF(npc));
QuickCallFunction(A, (void*)&PowerPC::CheckExceptions);
LDR(A, R9, PPCSTATE_OFF(npc));
STR(A, R9, PPCSTATE_OFF(pc));
gpr.Unlock(Reg); // This was locked in the instruction beforehand
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
}
void JitArm::WriteExceptionExit()
{
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
ARMReg A = gpr.GetReg(false);
LDR(A, R9, PPCSTATE_OFF(pc));
STR(A, R9, PPCSTATE_OFF(npc));
QuickCallFunction(A, (void*)&PowerPC::CheckExceptions);
LDR(A, R9, PPCSTATE_OFF(npc));
STR(A, R9, PPCSTATE_OFF(pc));
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
}
void JitArm::WriteExit(u32 destination)
{
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
JitBlock::LinkData linkData;
linkData.exitAddress = destination;
linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false;
// Link opportunity!
int block;
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
linkData.linkStatus = true;
}
else
{
ARMReg A = gpr.GetReg(false);
MOVI2R(A, destination);
STR(A, R9, PPCSTATE_OFF(pc));
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
}
b->linkData.push_back(linkData);
}
void JitArm::Run()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
}
void JitArm::SingleStep()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
pExecAddr();
}
void JitArm::Trace()
{
std::string regs;
std::string fregs;
#ifdef JIT_LOG_GPR
for (int i = 0; i < 32; i++)
{
regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]);
}
#endif
#ifdef JIT_LOG_FPR
for (int i = 0; i < 32; i++)
{
fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i));
}
#endif
DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s",
PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str());
}
void JitArm::Jit(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
{
ClearCache();
}
int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc);
JitBlock *b = blocks.GetBlock(block_num);
const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr);
}
void JitArm::Break(UGeckoInstruction inst)
{
ERROR_LOG(DYNA_REC, "%s called a Break instruction!", PPCTables::GetInstructionName(inst));
BKPT(0x4444);
}
void JitArm::BeginTimeProfile(JitBlock* b)
{
b->ticCounter = 0;
b->ticStart = 0;
b->ticStop = 0;
// Performance counters are bit finnicky on ARM
// We must first enable and program the PMU before using it
// This is a per core operation so with thread scheduling we may jump to a core we haven't enabled PMU yet
// Work around this by enabling PMU each time at the start of a block
// Some ARM CPUs are getting absurd core counts(48+!)
// We have to reset counters at the start of every block anyway, so may as well.
// One thing to note about performance counters on ARM
// The kernel can block access to these co-processor registers
// In the case that this happens, these will generate a SIGILL
// Refer to the ARM ARM about PMCR for what these do exactly
enum
{
PERF_OPTION_ENABLE = (1 << 0),
PERF_OPTION_RESET_CR = (1 << 1),
PERF_OPTION_RESET_CCR = (1 << 2),
PERF_OPTION_DIVIDER_MODE = (1 << 3),
PERF_OPTION_EXPORT_ENABLE = (1 << 4),
};
const u32 perf_options =
PERF_OPTION_ENABLE |
PERF_OPTION_RESET_CR |
PERF_OPTION_RESET_CCR |
PERF_OPTION_EXPORT_ENABLE;
MOVI2R(R0, perf_options);
// Programs the PMCR
MCR(15, 0, R0, 9, 12, 0);
MOVI2R(R0, 0x8000000F);
// Enables all counters
MCR(15, 0, R0, 9, 12, 1);
// Clears all counter overflows
MCR(15, 0, R0, 9, 12, 3);
// Gets the cycle counter
MRC(15, 0, R1, 9, 13, 0);
MOVI2R(R0, (u32)&b->ticStart);
STR(R1, R0, 0);
}
void JitArm::EndTimeProfile(JitBlock* b)
{
// Gets the cycle counter
MRC(15, 0, R1, 9, 13, 0);
MOVI2R(R0, (u32)&b->ticStop);
STR(R1, R0, 0);
MOVI2R(R0, (u32)&b->ticStart);
MOVI2R(R14, (u32)asm_routines.m_increment_profile_counter);
BL(R14);
}
const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b)
{
int blockSize = code_buf->GetSize();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{
// Comment out the following to disable breakpoints (speed-up)
if (!Profiler::g_ProfileBlocks)
{
if (PowerPC::GetState() == PowerPC::CPU_STEPPING)
blockSize = 1;
Trace();
}
}
if (em_address == 0)
{
Core::SetState(Core::CORE_PAUSE);
PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR);
}
js.isLastInstruction = false;
js.blockStart = em_address;
js.fifoBytesThisBlock = 0;
js.curBlock = b;
u32 nextPC = em_address;
// Analyze the block, collect all instructions it is made of (including inlining,
// if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize);
PPCAnalyst::CodeOp *ops = code_buf->codebuffer;
const u8 *start = GetCodePtr();
b->checkedEntry = start;
b->runCount = 0;
// Downcount flag check, Only valid for linked blocks
{
FixupBranch no_downcount = B_CC(CC_PL);
ARMReg rA = gpr.GetReg(false);
MOVI2R(rA, js.blockStart);
STR(rA, R9, PPCSTATE_OFF(pc));
MOVI2R(rA, (u32)asm_routines.doTiming);
B(rA);
SetJumpTarget(no_downcount);
}
const u8 *normalEntry = GetCodePtr();
b->normalEntry = normalEntry;
if (ImHereDebug)
QuickCallFunction(R14, (void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
if (js.fpa.any)
{
// This block uses FPU - needs to add FP exception bailout
ARMReg A = gpr.GetReg();
ARMReg C = gpr.GetReg();
Operand2 Shift(2, 10); // 1 << 13
MOVI2R(C, js.blockStart); // R3
LDR(A, R9, PPCSTATE_OFF(msr));
TST(A, Shift);
FixupBranch no_fpe = B_CC(CC_NEQ);
STR(C, R9, PPCSTATE_OFF(pc));
LDR(A, R9, PPCSTATE_OFF(Exceptions));
ORR(A, A, EXCEPTION_FPU_UNAVAILABLE);
STR(A, R9, PPCSTATE_OFF(Exceptions));
QuickCallFunction(A, (void*)&PowerPC::CheckExceptions);
LDR(A, R9, PPCSTATE_OFF(npc));
STR(A, R9, PPCSTATE_OFF(pc));
MOVI2R(A, (u32)asm_routines.dispatcher);
B(A);
SetJumpTarget(no_fpe);
gpr.Unlock(A, C);
}
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks)
{
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
MOVI2R(rA, (u32)&b->runCount); // Load in to register
LDR(rB, rA); // Load the actual value in to R11.
ADD(rB, rB, 1); // Add one to the value
STR(rB, rA); // Now store it back in the memory location
BeginTimeProfile(b);
gpr.Unlock(rA, rB);
}
gpr.Start(js.gpa);
fpr.Start(js.fpa);
js.downcountAmount = 0;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
js.skipInstructions = 0;
js.compilerPC = nextPC;
// Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
js.compilerPC = ops[i].address;
js.op = &ops[i];
js.instructionNumber = i;
const GekkoOPInfo *opinfo = ops[i].opinfo;
js.downcountAmount += opinfo->numCycles;
if (i == (code_block.m_num_instructions - 1))
{
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
}
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
PUSH(4, R0, R1, R2, R3);
QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe);
POP(4, R0, R1, R2, R3);
}
if (!ops[i].skip)
{
if (jo.memcheck && (opinfo->flags & FL_USE_FPU))
{
// Don't do this yet
BKPT(0x7777);
}
JitArmTables::CompileInstruction(ops[i]);
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreFromRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreFromRegister(j);
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// Don't do this yet
BKPT(0x666);
}
}
}
if (code_block.m_memory_exception)
BKPT(0x500);
if (code_block.m_broken)
{
printf("Broken Block going to 0x%08x\n", nextPC);
WriteExit(nextPC);
}
b->codeSize = (u32)(GetCodePtr() - start);
b->originalSize = code_block.m_num_instructions;
FlushIcache();
return start;
}

View File

@ -1,248 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// ========================
// See comments in Jit.cpp.
// ========================
// Mystery: Capcom vs SNK 800aa278
// CR flags approach:
// * Store that "N+Z flag contains CR0" or "S+Z flag contains CR3".
// * All flag altering instructions flush this
// * A flush simply does a conditional write to the appropriate CRx.
// * If flag available, branch code can become absolutely trivial.
// Settings
// ----------
#pragma once
#include "Core/PowerPC/CPUCoreBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitArm32/JitArmCache.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0]))
// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) > -4096 && PPCSTATE_OFF(spr[1023]) < 4096, "LDR can't reach all of the SPRs");
static_assert(PPCSTATE_OFF(ps[0][0]) >= -1020 && PPCSTATE_OFF(ps[0][0]) <= 1020, "VLDR can't reach all of the FPRs");
static_assert((PPCSTATE_OFF(ps[0][0]) % 4) == 0, "VLDR requires FPRs to be 4 byte aligned");
class JitArm : public JitBase, public ArmGen::ARMCodeBlock
{
private:
JitArmBlockCache blocks;
JitArmAsmRoutineManager asm_routines;
// TODO: Make arm specific versions of these, shouldn't be too hard to
// make it so we allocate some space at the start(?) of code generation
// and keep the registers in a cache. Will burn this bridge when we get to
// it.
ArmRegCache gpr;
ArmFPRCache fpr;
PPCAnalyst::CodeBuffer code_buffer;
// The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info;
void DoDownCount();
void Helper_UpdateCR1(ArmGen::ARMReg fpscr, ArmGen::ARMReg temp);
void SetFPException(ArmGen::ARMReg Reg, u32 Exception);
ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);
bool BackPatch(SContext* ctx);
bool DisasmLoadStore(const u8* ptr, u32* flags, ArmGen::ARMReg* rD, ArmGen::ARMReg* V1);
// Initializes the information that backpatching needs
// This is required so we know the backpatch routine sizes and trouble offsets
void InitBackpatch();
// Returns the trouble instruction offset
// Zero if it isn't a fastmem routine
u32 EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ArmGen::ARMReg RS, ArmGen::ARMReg V1 = ArmGen::ARMReg::INVALID_REG);
public:
JitArm() : code_buffer(32000) {}
~JitArm() {}
void Init();
void Shutdown();
// Jit!
void Jit(u32 em_address);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b);
JitBaseBlockCache *GetBlockCache() { return &blocks; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
void Trace();
void ClearCache();
CommonAsmRoutinesBase *GetAsmRoutines()
{
return &asm_routines;
}
const char *GetName()
{
return "JITARM";
}
// Run!
void Run();
void SingleStep();
// Utilities for use by opcodes
void WriteExit(u32 destination);
void WriteExitDestInR(ArmGen::ARMReg Reg);
void WriteRfiExitDestInR(ArmGen::ARMReg Reg);
void WriteExceptionExit();
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
void ComputeRC(ArmGen::ARMReg value, int cr = 0);
void ComputeRC(s32 value, int cr);
void ComputeCarry();
void ComputeCarry(bool Carry);
void GetCarryAndClear(ArmGen::ARMReg reg);
void FinalizeCarry(ArmGen::ARMReg reg);
void SafeStoreFromReg(s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void SafeLoadToReg(ArmGen::ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update);
// OPCODES
void FallBackToInterpreter(UGeckoInstruction _inst);
void DoNothing(UGeckoInstruction _inst);
void HLEFunction(UGeckoInstruction _inst);
void DynaRunTable4(UGeckoInstruction _inst);
void DynaRunTable19(UGeckoInstruction _inst);
void DynaRunTable31(UGeckoInstruction _inst);
void DynaRunTable59(UGeckoInstruction _inst);
void DynaRunTable63(UGeckoInstruction _inst);
// Breakin shit
void Break(UGeckoInstruction _inst);
// Branch
void bx(UGeckoInstruction _inst);
void bcx(UGeckoInstruction _inst);
void bclrx(UGeckoInstruction _inst);
void sc(UGeckoInstruction _inst);
void rfi(UGeckoInstruction _inst);
void bcctrx(UGeckoInstruction _inst);
// Integer
void arith(UGeckoInstruction _inst);
void addex(UGeckoInstruction _inst);
void subfic(UGeckoInstruction _inst);
void cntlzwx(UGeckoInstruction _inst);
void cmp (UGeckoInstruction _inst);
void cmpl(UGeckoInstruction _inst);
void cmpi(UGeckoInstruction _inst);
void cmpli(UGeckoInstruction _inst);
void negx(UGeckoInstruction _inst);
void mulhwux(UGeckoInstruction _inst);
void rlwimix(UGeckoInstruction _inst);
void rlwinmx(UGeckoInstruction _inst);
void rlwnmx(UGeckoInstruction _inst);
void srawix(UGeckoInstruction _inst);
void extshx(UGeckoInstruction inst);
void extsbx(UGeckoInstruction inst);
// System Registers
void mtmsr(UGeckoInstruction _inst);
void mfmsr(UGeckoInstruction _inst);
void mtspr(UGeckoInstruction _inst);
void mfspr(UGeckoInstruction _inst);
void mftb(UGeckoInstruction _inst);
void mcrf(UGeckoInstruction _inst);
void mtsr(UGeckoInstruction _inst);
void mfsr(UGeckoInstruction _inst);
void twx(UGeckoInstruction _inst);
// LoadStore
void stX(UGeckoInstruction _inst);
void lXX(UGeckoInstruction _inst);
void lmw(UGeckoInstruction _inst);
void stmw(UGeckoInstruction _inst);
void icbi(UGeckoInstruction _inst);
void dcbst(UGeckoInstruction _inst);
// Floating point
void fabsx(UGeckoInstruction _inst);
void fnabsx(UGeckoInstruction _inst);
void fnegx(UGeckoInstruction _inst);
void faddsx(UGeckoInstruction _inst);
void faddx(UGeckoInstruction _inst);
void fsubsx(UGeckoInstruction _inst);
void fsubx(UGeckoInstruction _inst);
void fmulsx(UGeckoInstruction _inst);
void fmulx(UGeckoInstruction _inst);
void fmrx(UGeckoInstruction _inst);
void fmaddsx(UGeckoInstruction _inst);
void fmaddx(UGeckoInstruction _inst);
void fctiwx(UGeckoInstruction _inst);
void fctiwzx(UGeckoInstruction _inst);
void fnmaddx(UGeckoInstruction _inst);
void fnmaddsx(UGeckoInstruction _inst);
void fresx(UGeckoInstruction _inst);
void fselx(UGeckoInstruction _inst);
void frsqrtex(UGeckoInstruction _inst);
// Floating point loadStore
void lfXX(UGeckoInstruction _inst);
void stfXX(UGeckoInstruction _inst);
// Paired Singles
void ps_add(UGeckoInstruction _inst);
void ps_div(UGeckoInstruction _inst);
void ps_res(UGeckoInstruction _inst);
void ps_sum0(UGeckoInstruction _inst);
void ps_sum1(UGeckoInstruction _inst);
void ps_madd(UGeckoInstruction _inst);
void ps_nmadd(UGeckoInstruction _inst);
void ps_msub(UGeckoInstruction _inst);
void ps_nmsub(UGeckoInstruction _inst);
void ps_madds0(UGeckoInstruction _inst);
void ps_madds1(UGeckoInstruction _inst);
void ps_sub(UGeckoInstruction _inst);
void ps_mul(UGeckoInstruction _inst);
void ps_muls0(UGeckoInstruction _inst);
void ps_muls1(UGeckoInstruction _inst);
void ps_merge00(UGeckoInstruction _inst);
void ps_merge01(UGeckoInstruction _inst);
void ps_merge10(UGeckoInstruction _inst);
void ps_merge11(UGeckoInstruction _inst);
void ps_mr(UGeckoInstruction _inst);
void ps_neg(UGeckoInstruction _inst);
void ps_abs(UGeckoInstruction _inst);
void ps_nabs(UGeckoInstruction _inst);
void ps_rsqrte(UGeckoInstruction _inst);
void ps_sel(UGeckoInstruction _inst);
// LoadStore paired
void psq_l(UGeckoInstruction _inst);
void psq_lx(UGeckoInstruction _inst);
void psq_st(UGeckoInstruction _inst);
void psq_stx(UGeckoInstruction _inst);
};

View File

@ -1,36 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
// Enable define below to enable oprofile integration. For this to work,
// it requires at least oprofile version 0.9.4, and changing the build
// system to link the Dolphin executable against libopagent. Since the
// dependency is a little inconvenient and this is possibly a slight
// performance hit, it's not enabled by default, but it's useful for
// locating performance issues.
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArmCache.h"
using namespace ArmGen;
void JitArmBlockCache::WriteLinkBlock(u8* location, const u8* address)
{
ARMXEmitter emit(location);
emit.B(address);
emit.FlushIcache();
}
void JitArmBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{
ARMXEmitter emit((u8 *)location);
emit.MOVI2R(R11, address);
emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher);
emit.STR(R11, R9, PPCSTATE_OFF(pc));
emit.B(R12);
emit.FlushIcache();
}

View File

@ -1,17 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/JitCommon/JitCache.h"
typedef void (*CompiledCode)();
class JitArmBlockCache : public JitBaseBlockCache
{
private:
void WriteLinkBlock(u8* location, const u8* address);
void WriteDestroyBlock(const u8* location, u32 address);
};

View File

@ -1,707 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <string>
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitArm32/Jit.h"
using namespace ArmGen;
// This generates some fairly heavy trampolines, but:
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
bool JitArm::DisasmLoadStore(const u8* ptr, u32* flags, ARMReg* rD, ARMReg* V1)
{
u32 inst = *(u32*)ptr;
u32 prev_inst = *(u32*)(ptr - 4);
u32 next_inst = *(u32*)(ptr + 4);
u8 op = (inst >> 20) & 0xFF;
*rD = (ARMReg)((inst >> 12) & 0xF);
switch (op)
{
case 0b01011000: // STR(imm)
case 0b01111000: // STR(register)
{
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
case 0b01011001: // LDR(imm)
case 0b01111001: // LDR(register)
{
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
// REV
if ((next_inst & 0x0FFF0FF0) != 0x06BF0F30)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0b00011101: // LDRH(imm)
case 0b00011001: // LDRH(register)
{
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
// REV16
if((next_inst & 0x0FFF0FF0) != 0x06BF0FB0)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0b01011101: // LDRB(imm)
case 0b01111101: // LDRB(register)
{
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
}
break;
case 0b01011100: // STRB(imm)
case 0b01111100: // STRB(register)
{
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
*rD = (ARMReg)((inst >> 12) & 0xF);
}
break;
case 0b00011100: // STRH(imm)
case 0b00011000: // STRH(register)
{
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
default:
{
// Could be a floating point loadstore
u8 op2 = (inst >> 24) & 0xF;
switch (op2)
{
case 0xD: // VLDR/VSTR
{
bool load = (inst >> 20) & 1;
bool single = !((inst >> 8) & 1);
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (single)
*flags |= BackPatchInfo::FLAG_SIZE_F32;
else
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (single)
{
if (!load)
{
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
}
}
break;
case 0x4: // VST1/VLD1
{
u32 size = (inst >> 6) & 0x3;
bool load = (inst >> 21) & 1;
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (size == 2) // 32bit
{
if (load)
{
// For 32bit loads we are loading to a temporary
// So we need to read PC+8,PC+12 to get the two destination registers
u32 vcvt_1 = *(u32*)(ptr + 8);
u32 vcvt_2 = *(u32*)(ptr + 12);
u32 dest_register_1 = (vcvt_1 >> 12) & 0xF;
dest_register_1 |= (vcvt_1 >> 18) & 0x10;
u32 dest_register_2 = (vcvt_2 >> 12) & 0xF;
dest_register_2 |= (vcvt_2 >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register_1 + D0);
*V1 = (ARMReg)(dest_register_2 + D0);
}
else
{
// For 32bit stores we are storing from a temporary
// So we need to check the VCVT at PC-8 for the source register
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
else if (size == 3) // 64bit
{
if (load)
{
// For 64bit loads we load directly in to the VFP register
u32 dest_register = (inst >> 12) & 0xF;
dest_register |= (inst >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register + D0);
}
else
{
// For 64bit stores we are storing from a temporary
// Check the previous VREV64 instruction for the real register
u32 src_register = prev_inst & 0xF;
src_register |= (prev_inst >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F64;
}
}
break;
default:
printf("Op is 0x%02x\n", op);
return false;
break;
}
}
}
return true;
}
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
{
if (access_address < (uintptr_t)Memory::physical_base)
PanicAlertT("Exception handler - access below memory space. 0x%08x", access_address);
return BackPatch(ctx);
}
bool JitArm::BackPatch(SContext* ctx)
{
// TODO: This ctx needs to be filled with our information
// We need to get the destination register before we start
u8* codePtr = (u8*)ctx->CTX_PC;
u32 Value = *(u32*)codePtr;
ARMReg rD = INVALID_REG;
ARMReg V1 = INVALID_REG;
u32 flags = 0;
if (!DisasmLoadStore(codePtr, &flags, &rD, &V1))
{
WARN_LOG(DYNA_REC, "Invalid backpatch at location 0x%08lx(0x%08x)", ctx->CTX_PC, Value);
exit(0);
}
BackPatchInfo& info = m_backpatch_info[flags];
ARMXEmitter emitter(codePtr - info.m_fastmem_trouble_inst_offset * 4);
u32 new_pc = (u32)emitter.GetCodePtr();
EmitBackpatchRoutine(&emitter, flags, false, true, rD, V1);
emitter.FlushIcache();
ctx->CTX_PC = new_pc;
return true;
}
u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARMReg RS, ARMReg V1)
{
ARMReg addr = R12;
ARMReg temp = R11;
u32 trouble_offset = 0;
const u8* code_base = emit->GetCodePtr();
if (fastmem)
{
ARMReg temp2 = R10;
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
emit->BIC(temp, addr, mask);
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->ADD(temp, temp, R8);
NEONXEmitter nemit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->VCVT(S0, RS, 0);
nemit.VREV32(I_8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
emit->VSTR(S0, temp, 0);
}
else
{
nemit.VREV64(I_8, D0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
nemit.VST1(I_64, D0, temp);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->ADD(temp, temp, R8);
NEONXEmitter nemit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
nemit.VLD1(F_32, D0, temp);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
nemit.VLD1(I_64, RS, temp);
nemit.VREV64(I_8, RS, RS); // Byte swap to result
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(temp2, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(temp2, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->STR(temp2, R8, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(temp2, R8, temp);
else
emit->STRB(RS, R8, temp);
}
else
{
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->LDR(RS, R8, temp); // 5
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->LDRH(RS, R8, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->LDRB(RS, R8, temp);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, RS); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, RS);
}
if (flags & BackPatchInfo::FLAG_EXTEND)
emit->SXTH(RS, RS);
}
}
else
{
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOV(R1, addr);
emit->VCVT(S0, RS, 0);
emit->VMOV(R0, S0);
emit->MOVI2R(temp, (u32)&PowerPC::Write_U32);
emit->BL(temp);
}
else
{
emit->MOVI2R(temp, (u32)&PowerPC::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(R0, RS);
emit->MOV(R2, addr);
#else
emit->VMOV(D0, RS);
emit->MOV(R0, addr);
#endif
emit->BL(temp);
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOVI2R(temp, (u32)&PowerPC::Read_U32);
emit->BL(temp);
emit->VMOV(S0, R0);
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
emit->MOVI2R(temp, (u32)&PowerPC::Read_F64);
emit->BL(temp);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(RS, R0);
#else
emit->VMOV(RS, D0);
#endif
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, RS);
emit->MOV(R1, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&PowerPC::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&PowerPC::Write_U16);
else
emit->MOVI2R(temp, (u32)&PowerPC::Write_U8);
emit->BL(temp);
emit->POP(4, R0, R1, R2, R3);
}
else
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&PowerPC::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&PowerPC::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->MOVI2R(temp, (u32)&PowerPC::Read_U8);
emit->BL(temp);
emit->MOV(temp, R0);
emit->POP(4, R0, R1, R2, R3);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
emit->MOV(RS, temp);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, temp); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, temp);
}
}
}
if (do_padding)
{
BackPatchInfo& info = m_backpatch_info[flags];
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size);
u32 code_size = emit->GetCodePtr() - code_base;
code_size /= 4;
emit->NOP(num_insts_max - code_size);
}
return trouble_offset;
}
void JitArm::InitBackpatch()
{
u32 flags = 0;
BackPatchInfo info;
u8* code_base = GetWritableCodePtr();
u8* code_end;
// Writes
{
// 8bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
// Loads
{
// 8bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - sign extend
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_EXTEND;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
}

View File

@ -1,309 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
// The branches are known good, or at least reasonably good.
// No need for a disable-mechanism.
using namespace ArmGen;
void JitArm::sc(UGeckoInstruction inst)
{
INSTRUCTION_START
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
MOVI2R(rA, js.compilerPC + 4);
STR(rA, R9, PPCSTATE_OFF(pc));
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
ORR(rA, rA, EXCEPTION_SYSCALL);
STR(rA, R9, PPCSTATE_OFF(Exceptions));
gpr.Unlock(rA);
WriteExceptionExit();
}
void JitArm::rfi(UGeckoInstruction inst)
{
INSTRUCTION_START
gpr.Flush();
fpr.Flush();
// See Interpreter rfi for details
const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
// MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13;
// R0 = MSR location
// R1 = MSR contents
// R2 = Mask
// R3 = Mask
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg rC = gpr.GetReg();
ARMReg rD = gpr.GetReg();
MOVI2R(rB, (~mask) & clearMSR13);
MOVI2R(rC, mask & clearMSR13);
LDR(rD, R9, PPCSTATE_OFF(msr));
AND(rD, rD, rB); // rD = Masked MSR
LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here
AND(rB, rB, rC); // rB contains masked SRR1 here
ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1
STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0]));
gpr.Unlock(rB, rC, rD);
WriteRfiExitDestInR(rA); // rA gets unlocked here
//AND(32, M(&MSR), Imm32((~mask) & clearMSR13));
//MOV(32, R(EAX), M(&SRR1));
//AND(32, R(EAX), Imm32(mask & clearMSR13));
//OR(32, M(&MSR), R(EAX));
// NPC = SRR0;
//MOV(32, R(EAX), M(&SRR0));
//WriteRfiExitDestInEAX();
}
void JitArm::bx(UGeckoInstruction inst)
{
INSTRUCTION_START
// We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK)
{
ARMReg rA = gpr.GetReg(false);
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rA, Jumpto);
STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
// If this is not the last instruction of a block,
// we will skip the rest process.
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction)
{
return;
}
gpr.Flush();
fpr.Flush();
u32 destination;
if (inst.AA)
destination = SignExt26(inst.LI << 2);
else
destination = js.compilerPC + SignExt26(inst.LI << 2);
if (destination == js.compilerPC)
{
//PanicAlert("Idle loop detected at %08x", destination);
// CALL(ProtectFunction(&CoreTiming::Idle, 0));
// JMP(Asm::testExceptions, true);
// make idle loops go faster
MOVI2R(R14, (u32)&CoreTiming::Idle);
BL(R14);
MOVI2R(R14, js.compilerPC);
STR(R14, R9, PPCSTATE_OFF(pc));
WriteExceptionExit();
}
WriteExit(destination);
}
void JitArm::bcx(UGeckoInstruction inst)
{
INSTRUCTION_START
// USES_CR
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
SUBS(rB, rB, 1);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
//SUB(32, M(&CTR), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = B_CC(CC_NEQ);
else
pCTRDontBranch = B_CC(CC_EQ);
}
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rB, Jumpto);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); // Careful, destroys R14, R12
}
gpr.Unlock(rA, rB);
u32 destination;
if (inst.AA)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExit(destination);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
void JitArm::bcctrx(UGeckoInstruction inst)
{
INSTRUCTION_START
// bcctrx doesn't decrement and/or test CTR
_dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!");
if (inst.BO_2 & BO_DONT_CHECK_CONDITION)
{
// BO_2 == 1z1zz -> b always
//NPC = CTR & 0xfffffffc;
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg();
if (inst.LK_3)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rA, Jumpto);
STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
// ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR]));
BIC(rA, rA, 0x3);
WriteExitDestInR(rA);
}
else
{
// Rare condition seen in (just some versions of?) Nintendo's NES Emulator
// BO_2 == 001zy -> b if false
// BO_2 == 011zy -> b if true
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR]));
BIC(rA, rA, 0x3);
if (inst.LK_3)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rB, Jumpto);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);
SetJumpTarget(b);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}
}
void JitArm::bclrx(UGeckoInstruction inst)
{
INSTRUCTION_START
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
SUBS(rB, rB, 1);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR]));
//SUB(32, M(&CTR), Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = B_CC(CC_NEQ);
else
pCTRDontBranch = B_CC(CC_EQ);
}
FixupBranch pConditionDontBranch;
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit
{
pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
//MOV(32, R(EAX), M(&LR));
//AND(32, R(EAX), Imm32(0xFFFFFFFC));
LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR]));
BIC(rA, rA, 0x3);
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(rB, Jumpto);
STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR]));
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
}
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
}

View File

@ -1,69 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
static const double minmaxFloat[2] = {-(double)0x80000000, (double)0x7FFFFFFF};
static const double doublenum = 0xfff8000000000000ull;
// Exception masks
static ArmGen::Operand2 FRFIMask(5, 0x8); // 0x60000
static ArmGen::Operand2 FIMask(2, 8); // 0x20000
static ArmGen::Operand2 FRMask(4, 8); // 0x40000
static ArmGen::Operand2 FXMask(2, 1); // 0x80000000
static ArmGen::Operand2 VEMask(0x40, 0); // 0x40
static ArmGen::Operand2 XXException(2, 4); // 0x2000000
static ArmGen::Operand2 CVIException(1, 0xC); // 0x100
static ArmGen::Operand2 NANException(1, 4); // 0x1000000
static ArmGen::Operand2 VXVCException(8, 8); // 0x80000
static ArmGen::Operand2 ZXException(1, 3); // 0x4000000
static ArmGen::Operand2 VXSQRTException(2, 5); // 0x200
inline void JitArm::SetFPException(ArmGen::ARMReg Reg, u32 Exception)
{
ArmGen::Operand2 *ExceptionMask;
switch (Exception)
{
case FPSCR_VXCVI:
ExceptionMask = &CVIException;
break;
case FPSCR_XX:
ExceptionMask = &XXException;
break;
case FPSCR_VXSNAN:
ExceptionMask = &NANException;
break;
case FPSCR_VXVC:
ExceptionMask = &VXVCException;
break;
case FPSCR_ZX:
ExceptionMask = &ZXException;
break;
case FPSCR_VXSQRT:
ExceptionMask = &VXSQRTException;
break;
default:
_assert_msg_(DYNA_REC, false, "Passed unsupported FPexception: 0x%08x", Exception);
return;
break;
}
ArmGen::ARMReg rB = gpr.GetReg();
MOV(rB, Reg);
ORR(Reg, Reg, *ExceptionMask);
CMP(rB, Reg);
SetCC(CC_NEQ);
ORR(Reg, Reg, FXMask); // If exception is set, set exception bit
SetCC();
BIC(Reg, Reg, FRFIMask);
gpr.Unlock(rB);
}

View File

@ -1,536 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_FPUtils.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp)
{
}
void JitArm::fctiwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(true);
u32 b = inst.FB;
u32 d = inst.FD;
ARMReg vB = fpr.R0(b);
ARMReg vD = fpr.R0(d);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
ARMReg V2 = fpr.GetReg();
ARMReg rA = gpr.GetReg();
ARMReg fpscrReg = gpr.GetReg();
FixupBranch DoneMax, DoneMin;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
MOVI2R(rA, (u32)minmaxFloat);
// Check if greater than max float
{
VLDR(V0, rA, 8); // Load Max
VCMPE(vB, V0);
VMRS(_PC); // Loads in to APSR
FixupBranch noException = B_CC(CC_LE);
VMOV(vD, V0); // Set to max
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMax = B();
SetJumpTarget(noException);
}
// Check if less than min float
{
VLDR(V0, rA, 0);
VCMPE(vB, V0);
VMRS(_PC);
FixupBranch noException = B_CC(CC_GE);
VMOV(vD, V0);
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMin = B();
SetJumpTarget(noException);
}
// Within ranges, convert to integer
// Set rounding mode first
// PPC <-> ARM rounding modes
// 0, 1, 2, 3 <-> 0, 3, 1, 2
ARMReg rB = gpr.GetReg();
VMRS(rA);
// Bits 22-23
BIC(rA, rA, Operand2(3, 5));
LDR(rB, R9, PPCSTATE_OFF(fpscr));
AND(rB, rB, 0x3); // Get the FPSCR rounding bits
CMP(rB, 1);
SetCC(CC_EQ); // zero
ORR(rA, rA, Operand2(3, 5));
SetCC(CC_NEQ);
CMP(rB, 2); // +inf
SetCC(CC_EQ);
ORR(rA, rA, Operand2(1, 5));
SetCC(CC_NEQ);
CMP(rB, 3); // -inf
SetCC(CC_EQ);
ORR(rA, rA, Operand2(2, 5));
SetCC();
VMSR(rA);
ORR(rA, rA, Operand2(3, 5));
VCVT(vD, vB, TO_INT | IS_SIGNED);
VMSR(rA);
gpr.Unlock(rB);
VCMPE(vD, vB);
VMRS(_PC);
SetCC(CC_EQ);
BIC(fpscrReg, fpscrReg, FRFIMask);
FixupBranch DoneEqual = B();
SetCC();
SetFPException(fpscrReg, FPSCR_XX);
ORR(fpscrReg, fpscrReg, FIMask);
VABS(V1, vB);
VABS(V2, vD);
VCMPE(V2, V1);
VMRS(_PC);
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, FRMask);
SetCC();
SetJumpTarget(DoneEqual);
SetJumpTarget(DoneMax);
SetJumpTarget(DoneMin);
MOVI2R(rA, (u32)&doublenum);
VLDR(V0, rA, 0);
NEONXEmitter nemit(this);
nemit.VORR(vD, vD, V0);
if (inst.Rc)
Helper_UpdateCR1(fpscrReg, rA);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(rA);
gpr.Unlock(fpscrReg);
fpr.Unlock(V0);
fpr.Unlock(V1);
fpr.Unlock(V2);
}
void JitArm::fctiwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(true);
u32 b = inst.FB;
u32 d = inst.FD;
ARMReg vB = fpr.R0(b);
ARMReg vD = fpr.R0(d);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
ARMReg V2 = fpr.GetReg();
ARMReg rA = gpr.GetReg();
ARMReg fpscrReg = gpr.GetReg();
FixupBranch DoneMax, DoneMin;
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
MOVI2R(rA, (u32)minmaxFloat);
// Check if greater than max float
{
VLDR(V0, rA, 8); // Load Max
VCMPE(vB, V0);
VMRS(_PC); // Loads in to APSR
FixupBranch noException = B_CC(CC_LE);
VMOV(vD, V0); // Set to max
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMax = B();
SetJumpTarget(noException);
}
// Check if less than min float
{
VLDR(V0, rA, 0);
VCMPE(vB, V0);
VMRS(_PC);
FixupBranch noException = B_CC(CC_GE);
VMOV(vD, V0);
SetFPException(fpscrReg, FPSCR_VXCVI);
DoneMin = B();
SetJumpTarget(noException);
}
// Within ranges, convert to integer
VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO);
VCMPE(vD, vB);
VMRS(_PC);
SetCC(CC_EQ);
BIC(fpscrReg, fpscrReg, FRFIMask);
FixupBranch DoneEqual = B();
SetCC();
SetFPException(fpscrReg, FPSCR_XX);
ORR(fpscrReg, fpscrReg, FIMask);
VABS(V1, vB);
VABS(V2, vD);
VCMPE(V2, V1);
VMRS(_PC);
SetCC(CC_GT);
ORR(fpscrReg, fpscrReg, FRMask);
SetCC();
SetJumpTarget(DoneEqual);
SetJumpTarget(DoneMax);
SetJumpTarget(DoneMin);
MOVI2R(rA, (u32)&doublenum);
VLDR(V0, rA, 0);
NEONXEmitter nemit(this);
nemit.VORR(vD, vD, V0);
if (inst.Rc)
Helper_UpdateCR1(fpscrReg, rA);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(rA);
gpr.Unlock(fpscrReg);
fpr.Unlock(V0);
fpr.Unlock(V1);
fpr.Unlock(V2);
}
void JitArm::fabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VABS(vD, vB);
}
void JitArm::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VABS(vD, vB);
VNEG(vD, vD);
}
void JitArm::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VNEG(vD, vB);
}
void JitArm::faddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VADD(vD0, vA, vB);
VMOV(vD1, vD0);
}
void JitArm::faddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VADD(vD, vA, vB);
}
void JitArm::fsubsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VSUB(vD0, vA, vB);
VMOV(vD1, vD0);
}
void JitArm::fsubx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VSUB(vD, vA, vB);
}
void JitArm::fmulsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD, false);
VMUL(vD0, vA, vC);
VMOV(vD1, vD0);
}
void JitArm::fmulx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD, false);
VMUL(vD0, vA, vC);
}
void JitArm::fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VMOV(vD, vB);
}
void JitArm::fmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VMOV(vD0, V0);
VMOV(vD1, V0);
fpr.Unlock(V0);
}
void JitArm::fmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VMOV(vD0, V0);
fpr.Unlock(V0);
}
void JitArm::fnmaddx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VNEG(vD0, V0);
fpr.Unlock(V0);
}
void JitArm::fnmaddsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMLA(V0, vA0, vC0);
VNEG(vD0, V0);
VNEG(vD1, V0);
fpr.Unlock(V0);
}
// XXX: Messes up Super Mario Sunshine title screen
void JitArm::fresx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
// FIXME
FALLBACK_IF(true);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
MOVI2R(V0, 1.0, INVALID_REG); // temp reg isn't needed for 1.0
VDIV(vD1, V0, vB0);
VDIV(vD0, V0, vB0);
fpr.Unlock(V0);
}
void JitArm::fselx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
VCMP(vA0);
VMRS(_PC);
FixupBranch GT0 = B_CC(CC_GE);
VMOV(vD0, vB0);
FixupBranch EQ0 = B();
SetJumpTarget(GT0);
VMOV(vD0, vC0);
SetJumpTarget(EQ0);
}
void JitArm::frsqrtex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(true);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg fpscrReg = gpr.GetReg();
ARMReg V0 = D1;
ARMReg rA = gpr.GetReg();
MOVI2R(fpscrReg, (u32)&PPC_NAN);
VLDR(V0, fpscrReg, 0);
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
VCMP(vB0);
VMRS(_PC);
FixupBranch Less0 = B_CC(CC_LT);
VMOV(vD0, V0);
SetFPException(fpscrReg, FPSCR_VXSQRT);
FixupBranch SkipOrr0 = B();
SetJumpTarget(Less0);
FixupBranch noException = B_CC(CC_EQ);
SetFPException(fpscrReg, FPSCR_ZX);
SetJumpTarget(noException);
SetJumpTarget(SkipOrr0);
VCVT(S0, vB0, 0);
NEONXEmitter nemit(this);
nemit.VRSQRTE(F_32, D0, D0);
VCVT(vD0, S0, 0);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, rA);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,547 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset)
{
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (regOffset == -1)
{
if (dest != -1)
{
if (gpr.IsImm(dest))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(dest));
}
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (dest != -1)
{
if (gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset);
}
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(dest), off))
{
ADD(rA, gpr.R(regOffset), off);
}
else
{
MOVI2R(rA, gpr.GetImm(dest));
ADD(rA, rA, gpr.R(regOffset));
}
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(regOffset), off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, gpr.GetImm(regOffset));
ADD(rA, rA, gpr.R(dest));
}
}
else
{
ADD(rA, gpr.R(dest), gpr.R(regOffset));
}
}
else
{
if (gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(regOffset);
}
else
{
MOV(rA, gpr.R(regOffset));
}
}
}
ARMReg RS = gpr.R(value);
u32 flags = BackPatchInfo::FLAG_STORE;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (is_immediate)
{
if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
{
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
if (accessSize == 32)
{
REV(RS, RS);
STR(RS, R10, R11);
REV(RS, RS);
}
else if (accessSize == 16)
{
REV16(RS, RS);
STRH(RS, R10, R11);
REV16(RS, RS);
}
else
{
STRB(RS, R10, R11);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, jo.fastmem, true, RS);
}
else
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, RS);
}
}
else
{
EmitBackpatchRoutine(this, flags, jo.fastmem, true, RS);
}
}
void JitArm::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, s = inst.RS;
s32 offset = inst.SIMM_16;
u32 accessSize = 0;
s32 regOffset = -1;
bool update = false;
switch (inst.OPCD)
{
case 45: // sthu
update = true;
case 44: // sth
accessSize = 16;
break;
case 31:
switch (inst.SUBOP10)
{
case 183: // stwux
update = true;
case 151: // stwx
accessSize = 32;
regOffset = b;
break;
case 247: // stbux
update = true;
case 215: // stbx
accessSize = 8;
regOffset = b;
break;
case 439: // sthux
update = true;
case 407: // sthx
accessSize = 16;
regOffset = b;
break;
}
break;
case 37: // stwu
update = true;
case 36: // stw
accessSize = 32;
break;
case 39: // stbu
update = true;
case 38: // stb
accessSize = 8;
break;
}
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
if (update)
{
ARMReg rA = gpr.GetReg();
ARMReg RB;
ARMReg RA = gpr.R(a);
if (regOffset != -1)
RB = gpr.R(regOffset);
// Check for DSI exception prior to writing back address
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch has_exception = B_CC(CC_NEQ);
if (regOffset == -1)
{
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
ADD(RA, RA, RB);
}
SetJumpTarget(has_exception);
gpr.Unlock(rA);
}
}
void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update)
{
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (offsetReg == -1)
{
if (addr != -1)
{
if (gpr.IsImm(addr))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(addr));
}
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (addr != -1)
{
if (gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg);
}
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(addr), off))
{
ADD(rA, gpr.R(offsetReg), off);
}
else
{
MOVI2R(rA, gpr.GetImm(addr));
ADD(rA, rA, gpr.R(offsetReg));
}
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(offsetReg), off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, gpr.GetImm(offsetReg));
ADD(rA, rA, gpr.R(addr));
}
}
else
{
ADD(rA, gpr.R(addr), gpr.R(offsetReg));
}
}
else
{
if (gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else
{
MOV(rA, gpr.R(offsetReg));
}
}
}
if (is_immediate)
MOVI2R(rA, imm_addr);
u32 flags = BackPatchInfo::FLAG_LOAD;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (reverse)
flags |= BackPatchInfo::FLAG_REVERSE;
if (signExtend)
flags |= BackPatchInfo::FLAG_EXTEND;
EmitBackpatchRoutine(this, flags,
jo.fastmem,
true, dest);
if (update)
MOV(gpr.R(addr), rA);
}
void JitArm::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, d = inst.RD;
s32 offset = inst.SIMM_16;
u32 accessSize = 0;
s32 offsetReg = -1;
bool update = false;
bool signExtend = false;
bool reverse = false;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 55: // lwzux
update = true;
case 23: // lwzx
accessSize = 32;
offsetReg = b;
break;
case 119: //lbzux
update = true;
case 87: // lbzx
accessSize = 8;
offsetReg = b;
break;
case 311: // lhzux
update = true;
case 279: // lhzx
accessSize = 16;
offsetReg = b;
break;
case 375: // lhaux
update = true;
case 343: // lhax
accessSize = 16;
signExtend = true;
offsetReg = b;
break;
case 534: // lwbrx
accessSize = 32;
reverse = true;
break;
case 790: // lhbrx
accessSize = 16;
reverse = true;
break;
}
break;
case 33: // lwzu
update = true;
case 32: // lwz
accessSize = 32;
break;
case 35: // lbzu
update = true;
case 34: // lbz
accessSize = 8;
break;
case 41: // lhzu
update = true;
case 40: // lhz
accessSize = 16;
break;
case 43: // lhau
update = true;
case 42: // lha
signExtend = true;
accessSize = 16;
break;
}
// Check for exception before loading
ARMReg rA = gpr.GetReg(false);
ARMReg RD = gpr.R(d);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_NEQ);
SafeLoadToReg(RD, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse, update);
SetJumpTarget(DoNotLoad);
// LWZ idle skipping
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(PowerPC::HostRead_U32(js.compilerPC + 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && PowerPC::HostRead_U32(js.compilerPC + 4) == 0x2C000000)) &&
PowerPC::HostRead_U32(js.compilerPC + 8) == 0x4182fff8)
{
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
rA = gpr.GetReg();
MOVI2R(rA, (u32)&PowerPC::OnIdle);
BL(rA);
gpr.Unlock(rA);
WriteExceptionExit();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
// Some games use this heavily in video codecs
// We make the assumption that this pulls from main RAM at /all/ times
void JitArm::lmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(!jo.fastmem);
u32 a = inst.RA;
ARMReg rA = gpr.GetReg();
MOVI2R(rA, inst.SIMM_16);
if (a)
ADD(rA, rA, gpr.R(a));
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rA, rA, mask);
ADD(rA, rA, R8);
for (int i = inst.RD; i < 32; i++)
{
ARMReg RX = gpr.R(i);
LDR(RX, rA, (i - inst.RD) * 4);
REV(RX, RX);
}
gpr.Unlock(rA);
}
void JitArm::stmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(!jo.fastmem);
u32 a = inst.RA;
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
MOVI2R(rA, inst.SIMM_16);
if (a)
ADD(rA, rA, gpr.R(a));
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(rA, rA, mask);
ADD(rA, rA, R8);
for (int i = inst.RD; i < 32; i++)
{
ARMReg RX = gpr.R(i);
REV(rB, RX);
STR(rB, rA, (i - inst.RD) * 4);
}
gpr.Unlock(rA, rB);
}
void JitArm::dcbst(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
// If the dcbst instruction is preceded by dcbt, it is flushing a prefetched
// memory location. Do not invalidate the JIT cache in this case as the memory
// will be the same.
// dcbt = 0x7c00022c
FALLBACK_IF((PowerPC::HostRead_U32(js.compilerPC - 4) & 0x7c00022c) != 0x7c00022c);
}
void JitArm::icbi(UGeckoInstruction inst)
{
FallBackToInterpreter(inst);
WriteExit(js.compilerPC + 4);
}

View File

@ -1,403 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
s32 offsetReg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 567: // lfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offsetReg = b;
break;
case 535: // lfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
case 599: // lfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 48: // lfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FD, false), v1 = INVALID_REG;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
v1 = fpr.R1(inst.FD, false);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
if (offsetReg == -1)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (offsetReg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (update)
RA = gpr.R(a);
if (is_immediate)
MOVI2R(addr, imm_addr);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (update)
MOV(RA, addr);
EmitBackpatchRoutine(this, flags,
jo.fastmem,
!(is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)), v0, v1);
SetJumpTarget(DoNotLoad);
}
void JitArm::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
s32 offsetReg = -1;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 663: // stfsx
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 695: // stfsux
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 727: // stfdx
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
break;
case 52: // stfs
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FS);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
if (offsetReg == -1)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (offsetReg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (is_immediate)
MOVI2R(addr, imm_addr);
if (update)
{
RA = gpr.R(a);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, addr);
SetCC();
}
if (is_immediate)
{
if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
{
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
ADD(R10, R10, R11);
NEONXEmitter nemit(this);
if (accessSize == 64)
{
PUSH(2, R0, R1);
nemit.VREV64(I_8, D0, v0);
VMOV(R0, D0);
STR(R0, R10, 0);
STR(R1, R10, 4);
POP(2, R0, R1);
}
else if (accessSize == 32)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VMOV(addr, S0);
STR(addr, R10);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, jo.fastmem, false, v0);
}
else
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, v0);
}
}
else
{
EmitBackpatchRoutine(this, flags, jo.fastmem, true, v0);
}
}

View File

@ -1,218 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::psq_l(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.OPCD == 57;
s32 offset = inst.SIMM_12;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFX(R12, R11, 16, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 24, 6); // Scale
LSL(R11, R11, 3);
Operand2 off;
if (TryMakeOperand2(offset, off))
{
if (inst.RA || update)
ADD(R10, gpr.R(inst.RA), off);
else
MOV(R10, off);
}
else
{
MOVI2R(R10, (u32)offset);
if (inst.RA || update) // Always uses the register on update
ADD(R10, R10, gpr.R(inst.RA));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);
// Values returned in S0, S1
BL(R14); // Jump to the quantizer Load
ARMReg vD0 = fpr.R0(inst.RS, false);
ARMReg vD1 = fpr.R1(inst.RS, false);
VCVT(vD0, S0, 0);
if (!inst.W)
VCVT(vD1, S1, 0);
else
MOVI2F(vD1, 1.0f, INVALID_REG); // No need for temp reg with 1.0f
}
void JitArm::psq_lx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.SUBOP10 == 38;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.Ix]));
UBFX(R12, R11, 16, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 24, 6); // Scale
LSL(R11, R11, 3);
if (inst.RA || update) // Always uses the register on update
{
ADD(R10, gpr.R(inst.RB), gpr.R(inst.RA));
}
else
{
MOV(R10, gpr.R(inst.RB));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.Wx ? 8 * 4 : 0);
// Values returned in S0, S1
BL(R14); // Jump to the quantizer Load
ARMReg vD0 = fpr.R0(inst.RS, false);
ARMReg vD1 = fpr.R1(inst.RS, false);
LDR(R14, R9, PPCSTATE_OFF(Exceptions));
CMP(R14, EXCEPTION_DSI);
SetCC(CC_NEQ);
VCVT(vD0, S0, 0);
if (!inst.Wx)
VCVT(vD1, S1, 0);
else
MOVI2F(vD1, 1.0f, INVALID_REG); // No need for temp reg with 1.0f
SetCC();
}
void JitArm::psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.OPCD == 61;
s32 offset = inst.SIMM_12;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFX(R12, R11, 0, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 8, 6); // Scale
LSL(R11, R11, 3);
Operand2 off;
if (TryMakeOperand2(offset, off))
{
if (inst.RA || update)
ADD(R10, gpr.R(inst.RA), off);
else
MOV(R10, off);
}
else
{
MOVI2R(R10, (u32)offset);
if (inst.RA || update) // Always uses the register on update
ADD(R10, R10, gpr.R(inst.RA));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedStoreQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);
ARMReg vD0 = fpr.R0(inst.RS);
VCVT(S0, vD0, 0);
if (!inst.W)
{
ARMReg vD1 = fpr.R1(inst.RS);
VCVT(S1, vD1, 0);
}
// floats passed through D0
BL(R14); // Jump to the quantizer Store
}
void JitArm::psq_stx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// R12 contains scale
// R11 contains type
// R10 is the ADDR
FALLBACK_IF(jo.memcheck || !jo.fastmem);
bool update = inst.SUBOP10 == 39;
LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFX(R12, R11, 0, 3); // Type
LSL(R12, R12, 2);
UBFX(R11, R11, 8, 6); // Scale
LSL(R11, R11, 3);
if (inst.RA || update) // Always uses the register on update
{
ADD(R10, gpr.R(inst.RA), gpr.R(inst.RB));
}
else
{
MOV(R10, gpr.R(inst.RB));
}
if (update)
MOV(gpr.R(inst.RA), R10);
MOVI2R(R14, (u32)asm_routines.pairedStoreQuantized);
ADD(R14, R14, R12);
LDR(R14, R14, inst.W ? 8 * 4 : 0);
ARMReg vD0 = fpr.R0(inst.RS);
VCVT(S0, vD0, 0);
if (!inst.W)
{
ARMReg vD1 = fpr.R1(inst.RS);
VCVT(S1, vD1, 0);
}
// floats passed through D0
BL(R14); // Jump to the quantizer Store
}

View File

@ -1,618 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_FPUtils.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
void JitArm::ps_rsqrte(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(true);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg fpscrReg = gpr.GetReg();
ARMReg V0 = D1;
ARMReg rA = gpr.GetReg();
MOVI2R(fpscrReg, (u32)&PPC_NAN);
VLDR(V0, fpscrReg, 0);
LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
VCMP(vB0);
VMRS(_PC);
FixupBranch Less0 = B_CC(CC_LT);
VMOV(vD0, V0);
SetFPException(fpscrReg, FPSCR_VXSQRT);
FixupBranch SkipOrr0 = B();
SetJumpTarget(Less0);
SetCC(CC_EQ);
ORR(rA, rA, 1);
SetCC();
SetJumpTarget(SkipOrr0);
VCMP(vB1);
VMRS(_PC);
FixupBranch Less1 = B_CC(CC_LT);
VMOV(vD1, V0);
SetFPException(fpscrReg, FPSCR_VXSQRT);
FixupBranch SkipOrr1 = B();
SetJumpTarget(Less1);
SetCC(CC_EQ);
ORR(rA, rA, 2);
SetCC();
SetJumpTarget(SkipOrr1);
CMP(rA, 0);
FixupBranch noException = B_CC(CC_EQ);
SetFPException(fpscrReg, FPSCR_ZX);
SetJumpTarget(noException);
VCVT(S0, vB0, 0);
VCVT(S1, vB1, 0);
NEONXEmitter nemit(this);
nemit.VRSQRTE(F_32, D0, D0);
VCVT(vD0, S0, 0);
VCVT(vD1, S1, 0);
STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
gpr.Unlock(fpscrReg, rA);
}
void JitArm::ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VCMP(vA0);
VMRS(_PC);
FixupBranch GT0 = B_CC(CC_GE);
VMOV(vD0, vB0);
FixupBranch EQ0 = B();
SetJumpTarget(GT0);
VMOV(vD0, vC0);
SetJumpTarget(EQ0);
VCMP(vA1);
VMRS(_PC);
FixupBranch GT1 = B_CC(CC_GE);
VMOV(vD1, vB1);
FixupBranch EQ1 = B();
SetJumpTarget(GT1);
VMOV(vD1, vC1);
SetJumpTarget(EQ1);
}
void JitArm::ps_add(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB0);
VADD(vD1, vA1, vB1);
}
void JitArm::ps_div(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VDIV(vD0, vA0, vB0);
VDIV(vD1, vA1, vB1);
}
void JitArm::ps_res(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
MOVI2R(V0, 1.0, INVALID_REG); // temp reg not needed for 1.0
VDIV(vD0, V0, vB0);
VDIV(vD1, V0, vB1);
fpr.Unlock(V0);
}
void JitArm::ps_nmadd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
VNEG(vD0, vD0);
VNEG(vD1, vD1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_madd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_nmsub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VSUB(vD0, V0, vB0);
VSUB(vD1, V1, vB1);
VNEG(vD0, vD0);
VNEG(vD1, vD1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_msub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC1);
VSUB(vD0, V0, vB0);
VSUB(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_madds0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC0);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_madds1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC1);
VMUL(V1, vA1, vC1);
VADD(vD0, V0, vB0);
VADD(vD1, V1, vB1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_sum0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB1);
VMOV(vD1, vC1);
}
void JitArm::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vC0);
VADD(vD1, vA0, vB1);
}
void JitArm::ps_sub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VSUB(vD0, vA0, vB0);
VSUB(vD1, vA1, vB1);
}
void JitArm::ps_mul(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMUL(vD0, vA0, vC0);
VMUL(vD1, vA1, vC1);
}
void JitArm::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC0);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC1);
VMUL(V1, vA1, vC1);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD1, vB0);
VMOV(vD0, vA0);
}
void JitArm::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA0);
VMOV(vD1, vB1);
}
void JitArm::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
VMOV(V0, vB0);
VMOV(vD0, vA1);
VMOV(vD1, V0);
fpr.Unlock(V0);
}
void JitArm::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARMReg vA1 = fpr.R1(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA1);
VMOV(vD1, vB1);
}
void JitArm::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vB0);
VMOV(vD1, vB1);
}
void JitArm::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VNEG(vD0, vB0);
VNEG(vD1, vB1);
}
void JitArm::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VABS(vD0, vB0);
VABS(vD1, vB1);
}
void JitArm::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VABS(vD0, vB0);
VNEG(vD0, vD0);
VABS(vD1, vB1);
VNEG(vD1, vD1);
}

View File

@ -1,217 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
FixupBranch JitArm::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
ARMReg RA = gpr.GetReg();
Operand2 SOBit(2, 2); // 0x10000000
Operand2 LTBit(1, 1); // 0x80000000
FixupBranch branch;
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
TST(RA, SOBit);
branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ);
break;
case CR_EQ_BIT: // check bits 31-0 == 0
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]));
CMP(RA, 0);
branch = B_CC(jump_if_set ? CC_EQ : CC_NEQ);
break;
case CR_GT_BIT: // check val > 0
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]));
CMP(RA, 1);
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
SBCS(RA, RA, 0);
branch = B_CC(jump_if_set ? CC_GE : CC_LT);
break;
case CR_LT_BIT: // check bit 62 set
LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32));
TST(RA, LTBit);
branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ);
break;
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
gpr.Unlock(RA);
return branch;
}
void JitArm::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_DMAU:
case SPR_SPRG0:
case SPR_SPRG1:
case SPR_SPRG2:
case SPR_SPRG3:
case SPR_SRR0:
case SPR_SRR1:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_LR:
case SPR_CTR:
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_XER:
{
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
ARMReg mask = gpr.GetReg();
MOVI2R(mask, 0xFF7F);
AND(tmp, RD, mask);
STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl));
LSR(tmp, RD, XER_CA_SHIFT);
AND(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSR(tmp, RD, XER_OV_SHIFT);
STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(tmp, mask);
}
break;
default:
FALLBACK_IF(true);
}
// OK, this is easy.
ARMReg RD = gpr.R(inst.RD);
STR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4);
}
void JitArm::mftb(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
mfspr(inst);
}
void JitArm::mfspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_XER:
{
gpr.BindToRegister(inst.RD, false);
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSL(tmp, tmp, XER_CA_SHIFT);
ORR(RD, RD, tmp);
LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
LSL(tmp, tmp, XER_OV_SHIFT);
ORR(RD, RD, tmp);
gpr.Unlock(tmp);
}
break;
case SPR_WPAR:
case SPR_DEC:
case SPR_TL:
case SPR_TU:
FALLBACK_IF(true);
default:
gpr.BindToRegister(inst.RD, false);
ARMReg RD = gpr.R(inst.RD);
LDR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4);
break;
}
}
void JitArm::mtsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm::mfsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm::mtmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
// Don't interpret this, if we do we get thrown out
//JITDISABLE(bJITSystemRegistersOff);
STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(msr));
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4);
}
void JitArm::mfmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(msr));
}
void JitArm::mcrf(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARMReg rA = gpr.GetReg();
if (inst.CRFS != inst.CRFD)
{
LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]));
STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]));
LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]) + sizeof(u32));
STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]) + sizeof(u32));
}
gpr.Unlock(rA);
}

View File

@ -1,483 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
// Should be moved in to the Jit class
typedef void (JitArm::*_Instruction) (UGeckoInstruction instCode);
static _Instruction dynaOpTable[64];
static _Instruction dynaOpTable4[1024];
static _Instruction dynaOpTable19[1024];
static _Instruction dynaOpTable31[1024];
static _Instruction dynaOpTable59[32];
static _Instruction dynaOpTable63[1024];
void JitArm::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);}
void JitArm::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);}
void JitArm::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);}
void JitArm::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);}
void JitArm::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);}
struct GekkoOPTemplate
{
int opcode;
_Instruction Inst;
//GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out
};
static GekkoOPTemplate primarytable[] =
{
{4, &JitArm::DynaRunTable4}, // RunTable4
{19, &JitArm::DynaRunTable19}, // RunTable19
{31, &JitArm::DynaRunTable31}, // RunTable31
{59, &JitArm::DynaRunTable59}, // RunTable59
{63, &JitArm::DynaRunTable63}, // RunTable63
{16, &JitArm::bcx}, // bcx
{18, &JitArm::bx}, // bx
{3, &JitArm::twx}, // twi
{17, &JitArm::sc}, // sc
{7, &JitArm::arith}, // mulli
{8, &JitArm::subfic}, // subfic
{10, &JitArm::cmpli}, // cmpli
{11, &JitArm::cmpi}, // cmpi
{12, &JitArm::arith}, // addic
{13, &JitArm::arith}, // addic_rc
{14, &JitArm::arith}, // addi
{15, &JitArm::arith}, // addis
{20, &JitArm::rlwimix}, // rlwimix
{21, &JitArm::rlwinmx}, // rlwinmx
{23, &JitArm::rlwnmx}, // rlwnmx
{24, &JitArm::arith}, // ori
{25, &JitArm::arith}, // oris
{26, &JitArm::arith}, // xori
{27, &JitArm::arith}, // xoris
{28, &JitArm::arith}, // andi_rc
{29, &JitArm::arith}, // andis_rc
{32, &JitArm::lXX}, // lwz
{33, &JitArm::lXX}, // lwzu
{34, &JitArm::lXX}, // lbz
{35, &JitArm::lXX}, // lbzu
{40, &JitArm::lXX}, // lhz
{41, &JitArm::lXX}, // lhzu
{42, &JitArm::lXX}, // lha
{43, &JitArm::lXX}, // lhau
{44, &JitArm::stX}, // sth
{45, &JitArm::stX}, // sthu
{36, &JitArm::stX}, // stw
{37, &JitArm::stX}, // stwu
{38, &JitArm::stX}, // stb
{39, &JitArm::stX}, // stbu
{46, &JitArm::lmw}, // lmw
{47, &JitArm::stmw}, // stmw
{48, &JitArm::lfXX}, // lfs
{49, &JitArm::lfXX}, // lfsu
{50, &JitArm::lfXX}, // lfd
{51, &JitArm::lfXX}, // lfdu
{52, &JitArm::stfXX}, // stfs
{53, &JitArm::stfXX}, // stfsu
{54, &JitArm::stfXX}, // stfd
{55, &JitArm::stfXX}, // stfdu
{56, &JitArm::psq_l}, // psq_l
{57, &JitArm::psq_l}, // psq_lu
{60, &JitArm::psq_st}, // psq_st
{61, &JitArm::psq_st}, // psq_stu
//missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58
};
static GekkoOPTemplate table4[] =
{ //SUBOP10
{0, &JitArm::FallBackToInterpreter}, // ps_cmpu0
{32, &JitArm::FallBackToInterpreter}, // ps_cmpo0
{40, &JitArm::ps_neg}, // ps_neg
{136, &JitArm::ps_nabs}, // ps_nabs
{264, &JitArm::ps_abs}, // ps_abs
{64, &JitArm::FallBackToInterpreter}, // ps_cmpu1
{72, &JitArm::ps_mr}, // ps_mr
{96, &JitArm::FallBackToInterpreter}, // ps_cmpo1
{528, &JitArm::ps_merge00}, // ps_merge00
{560, &JitArm::ps_merge01}, // ps_merge01
{592, &JitArm::ps_merge10}, // ps_merge10
{624, &JitArm::ps_merge11}, // ps_merge11
{1014, &JitArm::FallBackToInterpreter}, // dcbz_l
};
static GekkoOPTemplate table4_2[] =
{
{10, &JitArm::ps_sum0}, // ps_sum0
{11, &JitArm::ps_sum1}, // ps_sum1
{12, &JitArm::ps_muls0}, // ps_muls0
{13, &JitArm::ps_muls1}, // ps_muls1
{14, &JitArm::ps_madds0}, // ps_madds0
{15, &JitArm::ps_madds1}, // ps_madds1
{18, &JitArm::ps_div}, // ps_div
{20, &JitArm::ps_sub}, // ps_sub
{21, &JitArm::ps_add}, // ps_add
{23, &JitArm::ps_sel}, // ps_sel
{24, &JitArm::ps_res}, // ps_res
{25, &JitArm::ps_mul}, // ps_mul
{26, &JitArm::ps_rsqrte}, // ps_rsqrte
{28, &JitArm::ps_msub}, // ps_msub
{29, &JitArm::ps_madd}, // ps_madd
{30, &JitArm::ps_nmsub}, // ps_nmsub
{31, &JitArm::ps_nmadd}, // ps_nmadd
};
static GekkoOPTemplate table4_3[] =
{
{6, &JitArm::psq_lx}, // psq_lx
{7, &JitArm::psq_stx}, // psq_stx
{38, &JitArm::psq_lx}, // psq_lux
{39, &JitArm::psq_stx}, // psq_stux
};
static GekkoOPTemplate table19[] =
{
{528, &JitArm::bcctrx}, // bcctrx
{16, &JitArm::bclrx}, // bclrx
{257, &JitArm::FallBackToInterpreter}, // crand
{129, &JitArm::FallBackToInterpreter}, // crandc
{289, &JitArm::FallBackToInterpreter}, // creqv
{225, &JitArm::FallBackToInterpreter}, // crnand
{33, &JitArm::FallBackToInterpreter}, // crnor
{449, &JitArm::FallBackToInterpreter}, // cror
{417, &JitArm::FallBackToInterpreter}, // crorc
{193, &JitArm::FallBackToInterpreter}, // crxor
{150, &JitArm::DoNothing}, // isync
{0, &JitArm::mcrf}, // mcrf
{50, &JitArm::rfi}, // rfi
{18, &JitArm::Break}, // rfid
};
static GekkoOPTemplate table31[] =
{
{266, &JitArm::arith}, // addx
{778, &JitArm::arith}, // addox
{10, &JitArm::arith}, // addcx
{522, &JitArm::arith}, // addcox
{138, &JitArm::addex}, // addex
{650, &JitArm::addex}, // addeox
{234, &JitArm::FallBackToInterpreter}, // addmex
{746, &JitArm::FallBackToInterpreter}, // addmeox
{202, &JitArm::FallBackToInterpreter}, // addzex
{714, &JitArm::FallBackToInterpreter}, // addzeox
{491, &JitArm::FallBackToInterpreter}, // divwx
{1003, &JitArm::FallBackToInterpreter}, // divwox
{459, &JitArm::FallBackToInterpreter}, // divwux
{971, &JitArm::FallBackToInterpreter}, // divwuox
{75, &JitArm::FallBackToInterpreter}, // mulhwx
{11, &JitArm::mulhwux}, // mulhwux
{235, &JitArm::arith}, // mullwx
{747, &JitArm::arith}, // mullwox
{104, &JitArm::negx}, // negx
{616, &JitArm::negx}, // negox
{40, &JitArm::arith}, // subfx
{552, &JitArm::arith}, // subfox
{8, &JitArm::FallBackToInterpreter}, // subfcx
{520, &JitArm::FallBackToInterpreter}, // subfcox
{136, &JitArm::FallBackToInterpreter}, // subfex
{648, &JitArm::FallBackToInterpreter}, // subfeox
{232, &JitArm::FallBackToInterpreter}, // subfmex
{744, &JitArm::FallBackToInterpreter}, // subfmeox
{200, &JitArm::FallBackToInterpreter}, // subfzex
{712, &JitArm::FallBackToInterpreter}, // subfzeox
{28, &JitArm::arith}, // andx
{60, &JitArm::arith}, // andcx
{444, &JitArm::arith}, // orx
{124, &JitArm::arith}, // norx
{316, &JitArm::arith}, // xorx
{412, &JitArm::arith}, // orcx
{476, &JitArm::arith}, // nandx
{284, &JitArm::arith}, // eqvx
{0, &JitArm::cmp}, // cmp
{32, &JitArm::cmpl}, // cmpl
{26, &JitArm::cntlzwx}, // cntlzwx
{922, &JitArm::extshx}, // extshx
{954, &JitArm::extsbx}, // extsbx
{536, &JitArm::arith}, // srwx
{792, &JitArm::arith}, // srawx
{824, &JitArm::srawix}, // srawix
{24, &JitArm::arith}, // slwx
{54, &JitArm::dcbst}, // dcbst
{86, &JitArm::FallBackToInterpreter}, // dcbf
{246, &JitArm::DoNothing}, // dcbtst
{278, &JitArm::DoNothing}, // dcbt
{470, &JitArm::FallBackToInterpreter}, // dcbi
{758, &JitArm::DoNothing}, // dcba
{1014, &JitArm::FallBackToInterpreter}, // dcbz
//load word
{23, &JitArm::lXX}, // lwzx
{55, &JitArm::FallBackToInterpreter}, // lwzux
//load halfword
{279, &JitArm::lXX}, // lhzx
{311, &JitArm::lXX}, // lhzux
//load halfword signextend
{343, &JitArm::lXX}, // lhax
{375, &JitArm::lXX}, // lhaux
//load byte
{87, &JitArm::lXX}, // lbzx
{119, &JitArm::lXX}, // lbzux
//load byte reverse
{534, &JitArm::lXX}, // lwbrx
{790, &JitArm::lXX}, // lhbrx
// Conditional load/store (Wii SMP)
{150, &JitArm::FallBackToInterpreter}, // stwcxd
{20, &JitArm::FallBackToInterpreter}, // lwarx
//load string (interpret these)
{533, &JitArm::FallBackToInterpreter}, // lswx
{597, &JitArm::FallBackToInterpreter}, // lswi
//store word
{151, &JitArm::stX}, // stwx
{183, &JitArm::stX}, // stwux
//store halfword
{407, &JitArm::stX}, // sthx
{439, &JitArm::stX}, // sthux
//store byte
{215, &JitArm::stX}, // stbx
{247, &JitArm::stX}, // stbux
//store bytereverse
{662, &JitArm::FallBackToInterpreter}, // stwbrx
{918, &JitArm::FallBackToInterpreter}, // sthbrx
{661, &JitArm::FallBackToInterpreter}, // stswx
{725, &JitArm::FallBackToInterpreter}, // stswi
// fp load/store
{535, &JitArm::lfXX}, // lfsx
{567, &JitArm::lfXX}, // lfsux
{599, &JitArm::lfXX}, // lfdx
{631, &JitArm::lfXX}, // lfdux
{663, &JitArm::stfXX}, // stfsx
{695, &JitArm::stfXX}, // stfsux
{727, &JitArm::stfXX}, // stfdx
{759, &JitArm::stfXX}, // stfdux
{983, &JitArm::FallBackToInterpreter}, // stfiwx
{19, &JitArm::FallBackToInterpreter}, // mfcr
{83, &JitArm::mfmsr}, // mfmsr
{144, &JitArm::FallBackToInterpreter}, // mtcrf
{146, &JitArm::mtmsr}, // mtmsr
{210, &JitArm::mtsr}, // mtsr
{242, &JitArm::FallBackToInterpreter}, // mtsrin
{339, &JitArm::mfspr}, // mfspr
{467, &JitArm::mtspr}, // mtspr
{371, &JitArm::mftb}, // mftb
{512, &JitArm::FallBackToInterpreter}, // mcrxr
{595, &JitArm::mfsr}, // mfsr
{659, &JitArm::FallBackToInterpreter}, // mfsrin
{4, &JitArm::twx}, // tw
{598, &JitArm::DoNothing}, // sync
{982, &JitArm::icbi}, // icbi
// Unused instructions on GC
{310, &JitArm::FallBackToInterpreter}, // eciwx
{438, &JitArm::FallBackToInterpreter}, // ecowx
{854, &JitArm::DoNothing}, // eieio
{306, &JitArm::FallBackToInterpreter}, // tlbie
{370, &JitArm::FallBackToInterpreter}, // tlbia
{566, &JitArm::DoNothing}, // tlbsync
};
static GekkoOPTemplate table59[] =
{
{18, &JitArm::FallBackToInterpreter}, // fdivsx
{20, &JitArm::fsubsx}, // fsubsx
{21, &JitArm::faddsx}, // faddsx
// {22, &JitArm::FallBackToInterpreter}, // fsqrtsx
{24, &JitArm::fresx}, // fresx
{25, &JitArm::fmulsx}, // fmulsx
{28, &JitArm::FallBackToInterpreter}, // fmsubsx
{29, &JitArm::fmaddsx}, // fmaddsx
{30, &JitArm::FallBackToInterpreter}, // fnmsubsx
{31, &JitArm::fnmaddsx}, // fnmaddsx
};
static GekkoOPTemplate table63[] =
{
{264, &JitArm::fabsx}, // fabsx
{32, &JitArm::FallBackToInterpreter}, // fcmpo
{0, &JitArm::FallBackToInterpreter}, // fcmpu
{14, &JitArm::fctiwx}, // fctiwx
{15, &JitArm::fctiwzx}, // fctiwzx
{72, &JitArm::fmrx}, // fmrx
{136, &JitArm::fnabsx}, // fnabsx
{40, &JitArm::fnegx}, // fnegx
{12, &JitArm::FallBackToInterpreter}, // frspx
{64, &JitArm::FallBackToInterpreter}, // mcrfs
{583, &JitArm::FallBackToInterpreter}, // mffsx
{70, &JitArm::FallBackToInterpreter}, // mtfsb0x
{38, &JitArm::FallBackToInterpreter}, // mtfsb1x
{134, &JitArm::FallBackToInterpreter}, // mtfsfix
{711, &JitArm::FallBackToInterpreter}, // mtfsfx
};
static GekkoOPTemplate table63_2[] =
{
{18, &JitArm::FallBackToInterpreter}, // fdivx
{20, &JitArm::fsubx}, // fsubx
{21, &JitArm::faddx}, // faddx
{22, &JitArm::FallBackToInterpreter}, // fsqrtx
{23, &JitArm::fselx}, // fselx
{25, &JitArm::fmulx}, // fmulx
{26, &JitArm::frsqrtex}, // frsqrtex
{28, &JitArm::FallBackToInterpreter}, // fmsubx
{29, &JitArm::fmaddx}, // fmaddx
{30, &JitArm::FallBackToInterpreter}, // fnmsubx
{31, &JitArm::fnmaddx}, // fnmaddx
};
namespace JitArmTables
{
void CompileInstruction(PPCAnalyst::CodeOp & op)
{
JitArm *jitarm = (JitArm *)jit;
(jitarm->*dynaOpTable[op.inst.OPCD])(op.inst);
GekkoOPInfo *info = op.opinfo;
if (info)
{
#ifdef OPLOG
if (!strcmp(info->opname, OP_TO_LOG)) // "mcrfs"
{
rsplocations.push_back(jit.js.compilerPC);
}
#endif
info->compileCount++;
info->lastUse = jit->js.compilerPC;
}
}
void InitTables()
{
// once initialized, tables are read-only
static bool initialized = false;
if (initialized)
return;
//clear
for (auto& tpl : dynaOpTable)
{
tpl = &JitArm::FallBackToInterpreter;
}
for (int i = 0; i < 32; i++)
{
dynaOpTable59[i] = &JitArm::FallBackToInterpreter;
}
for (int i = 0; i < 1024; i++)
{
dynaOpTable4 [i] = &JitArm::FallBackToInterpreter;
dynaOpTable19[i] = &JitArm::FallBackToInterpreter;
dynaOpTable31[i] = &JitArm::FallBackToInterpreter;
dynaOpTable63[i] = &JitArm::FallBackToInterpreter;
}
for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++)
{
dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst;
}
for (int i = 0; i < 32; i++)
{
int fill = i << 5;
for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill+table4_2[j].opcode;
dynaOpTable4[op] = table4_2[j].Inst;
}
}
for (int i = 0; i < 16; i++)
{
int fill = i << 6;
for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill+table4_3[j].opcode;
dynaOpTable4[op] = table4_3[j].Inst;
}
}
for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++)
{
int op = table4[i].opcode;
dynaOpTable4[op] = table4[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++)
{
int op = table31[i].opcode;
dynaOpTable31[op] = table31[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++)
{
int op = table19[i].opcode;
dynaOpTable19[op] = table19[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++)
{
int op = table59[i].opcode;
dynaOpTable59[op] = table59[i].Inst;
}
for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++)
{
int op = table63[i].opcode;
dynaOpTable63[op] = table63[i].Inst;
}
for (int i = 0; i < 32; i++)
{
int fill = i << 5;
for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++)
{
int op = fill + table63_2[j].opcode;
dynaOpTable63[op] = table63_2[j].Inst;
}
}
initialized = true;
}
} // namespace

View File

@ -1,14 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCTables.h"
namespace JitArmTables
{
void CompileInstruction(PPCAnalyst::CodeOp & op);
void InitTables();
}

View File

@ -1,659 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/ArmEmitter.h"
#include "Common/MemoryUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitAsm.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
using namespace ArmGen;
//TODO - make an option
//#if _DEBUG
// bool enableDebug = false;
//#else
// bool enableDebug = false;
//#endif
JitArmAsmRoutineManager asm_routines;
static void WriteDual8(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr);
}
static void WriteDual16(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr);
}
static void WriteDual32(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
}
void JitArmAsmRoutineManager::Generate()
{
enterCode = GetCodePtr();
PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR);
// Take care to 8-byte align stack for function calls.
// We are misaligned here because of an odd number of args for PUSH.
// It's not like x86 where you need to account for an extra 4 bytes
// consumed by CALL.
SUB(_SP, _SP, 4);
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
MOVI2R(R8, (u32)Memory::physical_base);
FixupBranch skipToRealDispatcher = B();
dispatcher = GetCodePtr();
printf("Dispatcher is %p\n", dispatcher);
// Downcount Check
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
FixupBranch bail = B_CC(CC_MI);
FixupBranch dbg_exit;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{
MOVI2R(R0, (u32)PowerPC::GetStatePtr());
LDR(R0, R0);
TST(R0, PowerPC::CPU_STEPPING);
FixupBranch not_stepping = B_CC(CC_EQ);
// XXX: Check for breakpoints
dbg_exit = B();
SetJumpTarget(not_stepping);
}
SetJumpTarget(skipToRealDispatcher);
dispatcherNoCheck = GetCodePtr();
// This block of code gets the address of the compiled block of code
// It runs though to the compiling portion if it isn't found
LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12
Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK
BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here.
MOVI2R(R14, (u32)jit->GetBlockCache()->iCache.data());
LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here
// R12 Confirmed this is the correct iCache Location loaded.
TST(R12, 0x80); // Test to see if it is a JIT block.
FixupBranch no_block = B_CC(CC_NEQ);
// Success, it is our Jitblock.
MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers());
// LDR R14 right here to get CodePointers()[0] pointer.
LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size
LDR(R14, R14, R12); // Load the block address in to R14
B(R14);
// No need to jump anywhere after here, the block will go back to dispatcher start
SetJumpTarget(no_block);
// If we get to this point, that means that we don't have the block cached to execute
// So call ArmJit to compile the block and then execute it.
MOVI2R(R14, (u32)&Jit);
BL(R14);
B(dispatcherNoCheck);
SetJumpTarget(bail);
doTiming = GetCodePtr();
// XXX: In JIT64, Advance() gets called /after/ the exception checking
// once it jumps back to the start of outerLoop
QuickCallFunction(R14, (void*)&CoreTiming::Advance);
// Does exception checking
LDR(R0, R9, PPCSTATE_OFF(pc));
STR(R0, R9, PPCSTATE_OFF(npc));
QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions);
LDR(R0, R9, PPCSTATE_OFF(npc));
STR(R0, R9, PPCSTATE_OFF(pc));
// Check the state pointer to see if we are exiting
// Gets checked on every exception check
MOVI2R(R0, (u32)PowerPC::GetStatePtr());
MVN(R1, 0);
LDR(R0, R0);
TST(R0, R1);
FixupBranch Exit = B_CC(CC_NEQ);
B(dispatcher);
SetJumpTarget(Exit);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
SetJumpTarget(dbg_exit);
// Let the waiting thread know we are done leaving
MOVI2R(R0, (u32)&PowerPC::FinishStateMove);
BL(R0);
ADD(_SP, _SP, 4);
POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns
GenerateCommon();
FlushIcache();
}
void JitArmAsmRoutineManager::GenerateCommon()
{
// R14 is LR
// R12 is scratch
// R11 is scale
// R10 is the address
Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK)
Operand2 arghmask(3, 3); // 0x0C000000
NEONXEmitter nemit(this);
const u8* loadPairedIllegal = GetCodePtr();
BKPT(0x10);
const u8* loadPairedFloatTwo = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
nemit.VLD1(I_32, D0, R10);
nemit.VREV32(I_8, D0, D0);
MOV(_PC, _LR);
}
const u8* loadPairedFloatOne = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
nemit.VLD1(I_32, D0, R10);
nemit.VREV32(I_8, D0, D0);
MOV(_PC, _LR);
}
const u8* loadPairedU8Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRB(R12, R10);
VMOV(S0, R12);
LDRB(R12, R10, 1);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VCVT(S1, S1, TO_FLOAT);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedU8One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRB(R12, R10);
VMOV(S0, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS8Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRSB(R12, R10);
VMOV(S0, R12);
LDRSB(R12, R10, 1);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VCVT(S1, S1, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS8One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRSB(R12, R10);
VMOV(S0, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
const u8* loadPairedU16Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
REV16(R12, R12);
VMOV(S0, R12);
LDRH(R12, R10, 2);
REV16(R12, R12);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VCVT(S1, S1, TO_FLOAT);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedU16One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
REV16(R12, R12);
VMOV(S0, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS16Two = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
REV16(R12, R12);
SXTH(R12, R12);
VMOV(S0, R12);
LDRH(R12, R10, 2);
REV16(R12, R12);
SXTH(R12, R12);
VMOV(S1, R12);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VCVT(S1, S1, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
MOV(_PC, _LR);
}
const u8* loadPairedS16One = GetCodePtr();
{
BIC(R10, R10, mask);
ADD(R10, R10, R8);
LDRH(R12, R10);
MOVI2R(R10, (u32)&m_dequantizeTableS);
ADD(R10, R10, R11);
VLDR(S2, R10, 0);
REV16(R12, R12);
SXTH(R12, R12);
VMOV(S0, R12);
VCVT(S0, S0, TO_FLOAT | IS_SIGNED);
VMUL(S0, S0, S2);
MOV(_PC, _LR);
}
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
pairedLoadQuantized[0] = loadPairedFloatTwo;
pairedLoadQuantized[1] = loadPairedIllegal;
pairedLoadQuantized[2] = loadPairedIllegal;
pairedLoadQuantized[3] = loadPairedIllegal;
pairedLoadQuantized[4] = loadPairedU8Two;
pairedLoadQuantized[5] = loadPairedU16Two;
pairedLoadQuantized[6] = loadPairedS8Two;
pairedLoadQuantized[7] = loadPairedS16Two;
pairedLoadQuantized[8] = loadPairedFloatOne;
pairedLoadQuantized[9] = loadPairedIllegal;
pairedLoadQuantized[10] = loadPairedIllegal;
pairedLoadQuantized[11] = loadPairedIllegal;
pairedLoadQuantized[12] = loadPairedU8One;
pairedLoadQuantized[13] = loadPairedU16One;
pairedLoadQuantized[14] = loadPairedS8One;
pairedLoadQuantized[15] = loadPairedS16One;
// Stores
const u8* storePairedIllegal = GetCodePtr();
BKPT(0x21);
const u8* storePairedFloat = GetCodePtr();
{
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
nemit.VREV32(I_8, D0, D0);
nemit.VST1(I_32, D0, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual32);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedU8 = GetCodePtr();
{
// R10 is the addr
// R11 is the scale
// R12 is scratch
// S0, S1 is the values
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual8);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedS8 = GetCodePtr();
{
// R10 is the addr
// R11 is the scale
// R12 is scratch
// S0, S1 is the values
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual8);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedU16 = GetCodePtr();
{
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual16);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storePairedS16 = GetCodePtr();
{
PUSH(5, R0, R1, R2, R3, _LR);
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
VMUL(S1, S1, S2);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VCVT(S1, S1, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R0, S0);
VMOV(R1, S1);
MOV(R2, R10);
MOVI2R(R12, (u32)&WriteDual16);
BL(R12);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleIllegal = GetCodePtr();
BKPT(0x27);
const u8* storeSingleFloat = GetCodePtr();
{
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VMOV(R12, S0);
REV(R12, R12);
STR(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U32);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VMOV(R12, S0);
STRB(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U8);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleS8 = GetCodePtr();
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R12, S0);
STRB(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U8);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO);
VMOV(R12, S0);
REV16(R12, R12);
STRH(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U16);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
const u8* storeSingleS16 = GetCodePtr();
{
MOVI2R(R12, (u32)&m_quantizeTableS);
ADD(R12, R12, R11);
VLDR(S2, R12, 0);
VMUL(S0, S0, S2);
TST(R10, arghmask);
FixupBranch argh = B_CC(CC_NEQ);
BIC(R10, R10, mask);
ADD(R10, R10, R8);
VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED);
VMOV(R12, S0);
REV16(R12, R12);
STRH(R12, R10);
MOV(_PC, _LR);
SetJumpTarget(argh);
PUSH(5, R0, R1, R2, R3, _LR);
VMOV(R0, S0);
MOV(R1, R10);
MOVI2R(R10, (u32)&PowerPC::Write_U16);
BL(R10);
POP(5, R0, R1, R2, R3, _PC);
}
pairedStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
pairedStoreQuantized[0] = storePairedFloat;
pairedStoreQuantized[1] = storePairedIllegal;
pairedStoreQuantized[2] = storePairedIllegal;
pairedStoreQuantized[3] = storePairedIllegal;
pairedStoreQuantized[4] = storePairedU8;
pairedStoreQuantized[5] = storePairedU16;
pairedStoreQuantized[6] = storePairedS8;
pairedStoreQuantized[7] = storePairedS16;
pairedStoreQuantized[8] = storeSingleFloat;
pairedStoreQuantized[9] = storeSingleIllegal;
pairedStoreQuantized[10] = storeSingleIllegal;
pairedStoreQuantized[11] = storeSingleIllegal;
pairedStoreQuantized[12] = storeSingleU8;
pairedStoreQuantized[13] = storeSingleU16;
pairedStoreQuantized[14] = storeSingleS8;
pairedStoreQuantized[15] = storeSingleS16;
m_increment_profile_counter = AlignCode16();
nemit.VLD1(I_64, D0, R0); // Start
ADD(R0, R0, 8);
nemit.VLD1(I_64, D1, R0); // End
ADD(R0, R0, 8);
nemit.VLD1(I_64, D2, R0); // Counter
nemit.VSUB(I_64, D1, D1, D0);
nemit.VADD(I_64, D2, D2, D1);
nemit.VST1(I_64, D2, R0);
MOV(_PC, _LR);
}

View File

@ -1,32 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
class JitArmAsmRoutineManager : public CommonAsmRoutinesBase, public ArmGen::ARMCodeBlock
{
private:
void Generate();
void GenerateCommon();
public:
const u8* m_increment_profile_counter;
void Init()
{
AllocCodeSpace(8192);
Generate();
WriteProtect();
}
void Shutdown()
{
FreeCodeSpace();
}
};
extern JitArmAsmRoutineManager asm_routines;

View File

@ -1,252 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitFPRCache.h"
using namespace ArmGen;
ArmFPRCache::ArmFPRCache()
{
emit = nullptr;
}
void ArmFPRCache::Init(ARMXEmitter *emitter)
{
emit = emitter;
ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG);
ARMReg *Regs = GetAllocationOrder(NUMARMREG);
for (u8 a = 0; a < NUMPPCREG; ++a)
{
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].Reg = PPCRegs[a];
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].PS1 = false;
}
for (u8 a = 0; a < NUMARMREG; ++a)
{
ArmRegs[a].Reg = Regs[a];
ArmRegs[a].free = true;
}
}
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][0].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][1].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the ppc side.
static ARMReg allocationOrder[] =
{
D4, D5, D6, D7, D8, D9, D10, D11, D12, D13,
D14, D15, D16, D17, D18, D19, D20, D21, D22,
D23, D24, D25, D26, D27, D28, D29, D30, D31
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg *ArmFPRCache::GetAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the host side.
static ARMReg allocationOrder[] =
{
D0, D1, D2, D3
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg ArmFPRCache::GetReg(bool AutoLock)
{
for (u8 a = 0; a < NUMARMREG; ++a)
{
if (ArmRegs[a].free)
{
// Alright, this one is free
if (AutoLock)
ArmRegs[a].free = false;
return ArmRegs[a].Reg;
}
}
// Uh Oh, we have all them locked....
_assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb");
return D31;
}
void ArmFPRCache::Unlock(ARMReg V0)
{
for (u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum)
{
if (ArmRegs[RegNum].Reg == V0)
{
_assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked");
ArmRegs[RegNum].free = true;
}
}
}
u32 ArmFPRCache::GetLeastUsedRegister(bool increment)
{
u32 HighestUsed = 0;
u8 lastRegIndex = 0;
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (increment)
++ArmCRegs[a].LastLoad;
if (ArmCRegs[a].LastLoad > HighestUsed)
{
HighestUsed = ArmCRegs[a].LastLoad;
lastRegIndex = a;
}
}
return lastRegIndex;
}
bool ArmFPRCache::FindFreeRegister(u32 &regindex)
{
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (ArmCRegs[a].PPCReg == 33)
{
regindex = a;
return true;
}
}
return false;
}
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
{
u32 lastRegIndex = GetLeastUsedRegister(true);
if (_regs[preg][PS1].GetType() != REG_NOTLOADED)
{
u8 a = _regs[preg][PS1].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
return ArmCRegs[a].Reg;
}
u32 regindex;
if (FindFreeRegister(regindex))
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
ArmCRegs[regindex].PPCReg = preg;
ArmCRegs[regindex].LastLoad = 0;
ArmCRegs[regindex].PS1 = PS1;
_regs[preg][PS1].LoadToReg(regindex);
if (preLoad)
emit->VLDR(ArmCRegs[regindex].Reg, R9, offset);
return ArmCRegs[regindex].Reg;
}
// Alright, we couldn't get a free space, dump that least used register
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0);
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld);
_regs[ArmCRegs[lastRegIndex].PPCReg][ArmCRegs[lastRegIndex].PS1].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
ArmCRegs[lastRegIndex].PS1 = PS1;
_regs[preg][PS1].LoadToReg(lastRegIndex);
if (preLoad)
emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew);
return ArmCRegs[lastRegIndex].Reg;
}
ARMReg ArmFPRCache::R0(u32 preg, bool preLoad)
{
return GetPPCReg(preg, false, preLoad);
}
ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
{
return GetPPCReg(preg, true, preLoad);
}
void ArmFPRCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16);
u32 regindex = _regs[a][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
}
}
void ArmFPRCache::StoreFromRegister(u32 preg)
{
if (_regs[preg][0].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16);
u32 regindex = _regs[preg][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[preg][0].Flush();
}
if (_regs[preg][1].GetType() != REG_NOTLOADED)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + 8;
u32 regindex = _regs[preg][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[preg][1].Flush();
}
}

View File

@ -1,50 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
#define ARMFPUREGS 32
class ArmFPRCache
{
private:
OpArg _regs[32][2]; // One for each FPR reg
JRCPPC ArmCRegs[ARMFPUREGS];
JRCReg ArmRegs[ARMFPUREGS];
int NUMPPCREG;
int NUMARMREG;
ArmGen::ARMReg *GetAllocationOrder(int &count);
ArmGen::ARMReg *GetPPCAllocationOrder(int &count);
ArmGen::ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad);
u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex);
protected:
ArmGen::ARMXEmitter *emit;
public:
ArmFPRCache();
~ArmFPRCache() {}
void Init(ArmGen::ARMXEmitter *emitter);
void Start(PPCAnalyst::BlockRegStats &stats);
void SetEmitter(ArmGen::ARMXEmitter *emitter) {emit = emitter;}
ArmGen::ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ArmGen::ARMReg V0);
void Flush(FlushMode mode = FLUSH_ALL);
ArmGen::ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register
ArmGen::ARMReg R1(u32 preg, bool preLoad = true);
void StoreFromRegister(u32 preg);
};

View File

@ -1,319 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitRegCache.h"
using namespace ArmGen;
ArmRegCache::ArmRegCache()
{
emit = nullptr;
}
void ArmRegCache::Init(ARMXEmitter *emitter)
{
emit = emitter;
ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG);
ARMReg *Regs = GetAllocationOrder(NUMARMREG);
for (u8 a = 0; a < NUMPPCREG; ++a)
{
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].Reg = PPCRegs[a];
ArmCRegs[a].LastLoad = 0;
}
for (u8 a = 0; a < NUMARMREG; ++a)
{
ArmRegs[a].Reg = Regs[a];
ArmRegs[a].free = true;
}
}
void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats)
{
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[a].Flush();
}
}
ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the ppc side.
static ARMReg allocationOrder[] =
{
R0, R1, R2, R3, R4, R5, R6, R7
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg *ArmRegCache::GetAllocationOrder(int &count)
{
// This will return us the allocation order of the registers we can use on
// the host side.
static ARMReg allocationOrder[] =
{
R14, R12, R11, R10
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
ARMReg ArmRegCache::GetReg(bool AutoLock)
{
for (u8 a = 0; a < NUMARMREG; ++a)
{
if (ArmRegs[a].free)
{
// Alright, this one is free
if (AutoLock)
ArmRegs[a].free = false;
return ArmRegs[a].Reg;
}
}
// Uh Oh, we have all them locked....
_assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb");
return R0;
}
void ArmRegCache::Unlock(ARMReg R0, ARMReg R1, ARMReg R2, ARMReg R3)
{
for (u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum)
{
if (ArmRegs[RegNum].Reg == R0)
{
_assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked");
ArmRegs[RegNum].free = true;
}
if (R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1)
ArmRegs[RegNum].free = true;
if (R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2)
ArmRegs[RegNum].free = true;
if (R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3)
ArmRegs[RegNum].free = true;
}
}
u32 ArmRegCache::GetLeastUsedRegister(bool increment)
{
u32 HighestUsed = 0;
u8 lastRegIndex = 0;
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (increment)
++ArmCRegs[a].LastLoad;
if (ArmCRegs[a].LastLoad > HighestUsed)
{
HighestUsed = ArmCRegs[a].LastLoad;
lastRegIndex = a;
}
}
return lastRegIndex;
}
bool ArmRegCache::FindFreeRegister(u32 &regindex)
{
for (u8 a = 0; a < NUMPPCREG; ++a)
{
if (ArmCRegs[a].PPCReg == 33)
{
regindex = a;
return true;
}
}
return false;
}
ARMReg ArmRegCache::R(u32 preg)
{
if (regs[preg].GetType() == REG_IMM)
return BindToRegister(preg, true, true);
u32 lastRegIndex = GetLeastUsedRegister(true);
// Check if already Loaded
if (regs[preg].GetType() == REG_REG)
{
u8 a = regs[preg].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
return ArmCRegs[a].Reg;
}
// Check if we have a free register
u32 regindex;
if (FindFreeRegister(regindex))
{
emit->LDR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[regindex].PPCReg = preg;
ArmCRegs[regindex].LastLoad = 0;
regs[preg].LoadToReg(regindex);
return ArmCRegs[regindex].Reg;
}
// Alright, we couldn't get a free space, dump that least used register
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
void ArmRegCache::BindToRegister(u32 preg, bool doLoad)
{
BindToRegister(preg, doLoad, false);
}
ARMReg ArmRegCache::BindToRegister(u32 preg, bool doLoad, bool kill_imm)
{
u32 lastRegIndex = GetLeastUsedRegister(false);
u32 freeRegIndex;
bool found_free = FindFreeRegister(freeRegIndex);
if (regs[preg].GetType() == REG_IMM)
{
if (!kill_imm)
return INVALID_REG;
if (found_free)
{
if (doLoad)
emit->MOVI2R(ArmCRegs[freeRegIndex].Reg, regs[preg].GetImm());
ArmCRegs[freeRegIndex].PPCReg = preg;
ArmCRegs[freeRegIndex].LastLoad = 0;
regs[preg].LoadToReg(freeRegIndex);
return ArmCRegs[freeRegIndex].Reg;
}
else
{
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
if (doLoad)
emit->MOVI2R(ArmCRegs[lastRegIndex].Reg, regs[preg].GetImm());
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
}
else if (regs[preg].GetType() == REG_NOTLOADED)
{
if (found_free)
{
if (doLoad)
emit->LDR(ArmCRegs[freeRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[freeRegIndex].PPCReg = preg;
ArmCRegs[freeRegIndex].LastLoad = 0;
regs[preg].LoadToReg(freeRegIndex);
return ArmCRegs[freeRegIndex].Reg;
}
else
{
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
if (doLoad)
emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
}
else
{
u8 a = regs[preg].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
return ArmCRegs[a].Reg;
}
}
void ArmRegCache::SetImmediate(u32 preg, u32 imm)
{
if (regs[preg].GetType() == REG_REG)
{
// Dump real reg at this point
u32 regindex = regs[preg].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[preg].LoadToImm(imm);
}
void ArmRegCache::Flush(FlushMode mode)
{
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_IMM)
{
if (mode == FLUSH_ALL)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(a, true, true);
}
else
{
ARMReg tmp = GetReg();
emit->MOVI2R(tmp, regs[a].GetImm());
emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4);
Unlock(tmp);
}
}
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4);
if (mode == FLUSH_ALL)
{
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
regs[a].Flush();
}
}
}
}
void ArmRegCache::StoreFromRegister(u32 preg)
{
if (regs[preg].GetType() == REG_IMM)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(preg, true, true);
}
if (regs[preg].GetType() == REG_REG)
{
u32 regindex = regs[preg].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
regs[preg].Flush();
}
}

View File

@ -1,140 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/ArmEmitter.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCAnalyst.h"
// This ARM Register cache actually pre loads the most used registers before
// the block to increase speed since every memory load requires two
// instructions to load it. We are going to use R0-RMAX as registers for the
// use of PPC Registers.
// Allocation order as follows
#define ARMREGS 16
// Allocate R0 to R9 for PPC first.
// For General registers on the host side, start with R14 and go down as we go
// R13 is reserved for our stack pointer, don't ever use that. Unless you save
// it
// So we have R14, R12, R11, R10 to work with instructions
enum RegType
{
REG_NOTLOADED = 0,
REG_REG, // Reg type is register
REG_IMM, // Reg is really a IMM
REG_AWAY, // Bound to a register, but not preloaded
};
enum FlushMode
{
FLUSH_ALL = 0,
FLUSH_MAINTAIN_STATE,
};
class OpArg
{
private:
RegType m_type; // store type
u8 m_reg; // index to register
u32 m_value; // IMM value
public:
OpArg()
{
m_type = REG_NOTLOADED;
m_reg = 33;
m_value = 0;
}
RegType GetType()
{
return m_type;
}
u8 GetRegIndex()
{
return m_reg;
}
u32 GetImm()
{
return m_value;
}
void LoadToAway(u8 reg)
{
m_type = REG_AWAY;
m_reg = reg;
}
void LoadToReg(u8 reg)
{
m_type = REG_REG;
m_reg = reg;
}
void LoadToImm(u32 imm)
{
m_type = REG_IMM;
m_value = imm;
}
void Flush()
{
m_type = REG_NOTLOADED;
}
};
struct JRCPPC
{
u32 PPCReg; // Tied to which PPC Register
bool PS1;
ArmGen::ARMReg Reg; // Tied to which ARM Register
u32 LastLoad;
};
struct JRCReg
{
ArmGen::ARMReg Reg; // Which reg this is.
bool free;
};
class ArmRegCache
{
private:
OpArg regs[32];
JRCPPC ArmCRegs[ARMREGS];
JRCReg ArmRegs[ARMREGS]; // Four registers remaining
int NUMPPCREG;
int NUMARMREG;
ArmGen::ARMReg *GetAllocationOrder(int &count);
ArmGen::ARMReg *GetPPCAllocationOrder(int &count);
u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex);
// Private function can kill immediates
ArmGen::ARMReg BindToRegister(u32 preg, bool doLoad, bool kill_imm);
protected:
ArmGen::ARMXEmitter *emit;
public:
ArmRegCache();
~ArmRegCache() {}
void Init(ArmGen::ARMXEmitter *emitter);
void Start(PPCAnalyst::BlockRegStats &stats);
ArmGen::ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ArmGen::ARMReg R0, ArmGen::ARMReg R1 = ArmGen::INVALID_REG, ArmGen::ARMReg R2 = ArmGen::INVALID_REG, ArmGen::ARMReg R3 = ArmGen::INVALID_REG);
void Flush(FlushMode mode = FLUSH_ALL);
ArmGen::ARMReg R(u32 preg); // Returns a cached register
bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; }
u32 GetImm(u32 preg) { return regs[preg].GetImm(); }
void SetImmediate(u32 preg, u32 imm);
// Public function doesn't kill immediates
// In reality when you call R(u32) it'll bind an immediate there
void BindToRegister(u32 preg, bool doLoad = true);
void StoreFromRegister(u32 preg);
};

View File

@ -27,11 +27,6 @@
#include "Core/PowerPC/Jit64IL/JitIL_Tables.h"
#endif
#if _M_ARM_32
#include "Core/PowerPC/JitArm32/Jit.h"
#include "Core/PowerPC/JitArm32/JitArm_Tables.h"
#endif
#if _M_ARM_64
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_Tables.h"
@ -63,11 +58,6 @@ namespace JitInterface
ptr = new JitIL();
break;
#endif
#if _M_ARM_32
case PowerPC::CORE_JITARM:
ptr = new JitArm();
break;
#endif
#if _M_ARM_64
case PowerPC::CORE_JITARM64:
ptr = new JitArm64();
@ -94,11 +84,6 @@ namespace JitInterface
JitILTables::InitTables();
break;
#endif
#if _M_ARM_32
case PowerPC::CORE_JITARM:
JitArmTables::InitTables();
break;
#endif
#if _M_ARM_64
case PowerPC::CORE_JITARM64:
JitArm64Tables::InitTables();