mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-23 06:09:50 -06:00
JIT: Clean up float loads and stores.
Less code is good, and this should make future changes to memory handling easier.
This commit is contained in:
@ -1286,9 +1286,7 @@ void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
|
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
|
||||||
if (arg.IsSimpleReg())
|
if (src > 7 || arg.IsSimpleReg())
|
||||||
PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination");
|
|
||||||
if (src > 7)
|
|
||||||
{
|
{
|
||||||
// Alternate encoding
|
// Alternate encoding
|
||||||
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
||||||
|
@ -88,7 +88,7 @@ static GekkoOPTemplate primarytable[] =
|
|||||||
{51, &Jit64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
{51, &Jit64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||||
|
|
||||||
{52, &Jit64::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
{52, &Jit64::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
||||||
{53, &Jit64::stfs}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
{53, &Jit64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||||
{54, &Jit64::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
{54, &Jit64::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||||
{55, &Jit64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
{55, &Jit64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||||
|
|
||||||
|
@ -2,9 +2,6 @@
|
|||||||
// Licensed under GPLv2
|
// Licensed under GPLv2
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
|
||||||
// Should give a very noticeable speed boost to paired single heavy code.
|
|
||||||
|
|
||||||
#include "Common/Common.h"
|
#include "Common/Common.h"
|
||||||
#include "Common/CPUDetect.h"
|
#include "Common/CPUDetect.h"
|
||||||
|
|
||||||
@ -12,20 +9,8 @@
|
|||||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// pshufb todo: MOVQ
|
|
||||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
|
||||||
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
|
||||||
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
|
||||||
|
|
||||||
u64 GC_ALIGNED16(temp64);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
||||||
// and pshufb could help a lot.
|
// and pshufb could help a lot.
|
||||||
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
|
|
||||||
|
|
||||||
void Jit64::lfs(UGeckoInstruction inst)
|
void Jit64::lfs(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
@ -40,12 +25,11 @@ void Jit64::lfs(UGeckoInstruction inst)
|
|||||||
|
|
||||||
SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false);
|
SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false);
|
||||||
|
|
||||||
MEMCHECK_START
|
|
||||||
|
|
||||||
fpr.Lock(d);
|
fpr.Lock(d);
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, js.memcheck);
|
||||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
|
||||||
|
|
||||||
|
MEMCHECK_START
|
||||||
|
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
@ -56,61 +40,23 @@ void Jit64::lfd(UGeckoInstruction inst)
|
|||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||||
FALLBACK_IF(js.memcheck || !inst.RA);
|
FALLBACK_IF(!inst.RA);
|
||||||
|
|
||||||
int d = inst.RD;
|
int d = inst.RD;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
|
||||||
gpr.Lock(a);
|
SafeLoadToReg(RAX, gpr.R(a), 64, offset, RegistersInUse(), false);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
|
||||||
// TODO - optimize. This has to load the previous value - upper double should stay unmodified.
|
|
||||||
fpr.Lock(d);
|
fpr.Lock(d);
|
||||||
fpr.BindToRegister(d, true);
|
fpr.BindToRegister(d, true);
|
||||||
X64Reg xd = fpr.RX(d);
|
|
||||||
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
#if _M_X86_64
|
|
||||||
MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
|
||||||
#else
|
|
||||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
|
||||||
#endif
|
|
||||||
PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe));
|
|
||||||
MOVSD(xd, R(XMM0));
|
|
||||||
} else {
|
|
||||||
#if _M_X86_64
|
|
||||||
LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
|
||||||
MOV(64, M(&temp64), R(EAX));
|
|
||||||
|
|
||||||
MEMCHECK_START
|
MEMCHECK_START
|
||||||
|
MOVQ_xmm(XMM0, R(RAX));
|
||||||
MOVSD(XMM0, M(&temp64));
|
MOVSD(fpr.RX(d), R(XMM0));
|
||||||
MOVSD(xd, R(XMM0));
|
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
#else
|
|
||||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
|
||||||
BSWAP(32, EAX);
|
|
||||||
MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));
|
|
||||||
|
|
||||||
MEMCHECK_START
|
|
||||||
|
|
||||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
|
|
||||||
BSWAP(32, EAX);
|
|
||||||
MOV(32, M(&temp64), R(EAX));
|
|
||||||
MOVSD(XMM0, M(&temp64));
|
|
||||||
MOVSD(xd, R(XMM0));
|
|
||||||
|
|
||||||
MEMCHECK_END
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,146 +65,49 @@ void Jit64::stfd(UGeckoInstruction inst)
|
|||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||||
FALLBACK_IF(js.memcheck || !inst.RA);
|
FALLBACK_IF(!inst.RA);
|
||||||
|
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
|
|
||||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
|
||||||
if (Core::g_CoreStartupParameter.bMMU ||
|
|
||||||
Core::g_CoreStartupParameter.bTLBHack) {
|
|
||||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
|
||||||
}
|
|
||||||
#ifdef ENABLE_MEM_CHECK
|
|
||||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
|
||||||
{
|
|
||||||
mem_mask |= Memory::EXRAM_MASK;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
gpr.Lock(a);
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
fpr.Lock(s);
|
|
||||||
gpr.BindToRegister(a, true, false);
|
if (fpr.R(s).IsSimpleReg())
|
||||||
|
MOVQ_xmm(R(RAX), fpr.RX(s));
|
||||||
|
else
|
||||||
|
MOV(64, R(RAX), fpr.R(s));
|
||||||
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, RegistersInUse());
|
||||||
TEST(32, R(ABI_PARAM1), Imm32(mem_mask));
|
|
||||||
FixupBranch safe = J_CC(CC_NZ);
|
|
||||||
|
|
||||||
// Fast routine
|
|
||||||
if (cpu_info.bSSSE3) {
|
|
||||||
MOVAPD(XMM0, fpr.R(s));
|
|
||||||
PSHUFB(XMM0, M((void*)bswapShuffle1x8));
|
|
||||||
#if _M_X86_64
|
|
||||||
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0);
|
|
||||||
#else
|
|
||||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
MOVAPD(XMM0, fpr.R(s));
|
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
|
||||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);
|
|
||||||
|
|
||||||
PSRLQ(XMM0, 32);
|
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
|
||||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
|
|
||||||
}
|
|
||||||
FixupBranch exit = J(true);
|
|
||||||
SetJumpTarget(safe);
|
|
||||||
|
|
||||||
// Safe but slow routine
|
|
||||||
MOVAPD(XMM0, fpr.R(s));
|
|
||||||
PSRLQ(XMM0, 32);
|
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
|
||||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));
|
|
||||||
|
|
||||||
MOVAPD(XMM0, fpr.R(s));
|
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
|
||||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
|
||||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());
|
|
||||||
|
|
||||||
SetJumpTarget(exit);
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// In Release on 32bit build,
|
|
||||||
// this seemed to cause a problem with PokePark2
|
|
||||||
// at start after talking to first pokemon,
|
|
||||||
// you run and smash a box, then he goes on about
|
|
||||||
// following him and then you cant do anything.
|
|
||||||
// I have enabled interpreter for this function
|
|
||||||
// in the mean time.
|
|
||||||
// Parlane
|
|
||||||
void Jit64::stfs(UGeckoInstruction inst)
|
void Jit64::stfs(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||||
|
FALLBACK_IF(!inst.RA);
|
||||||
|
|
||||||
bool update = inst.OPCD & 1;
|
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
|
|
||||||
FALLBACK_IF(!a || update);
|
|
||||||
|
|
||||||
fpr.BindToRegister(s, true, false);
|
fpr.BindToRegister(s, true, false);
|
||||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||||
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
if (gpr.R(a).IsImm())
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
{
|
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, RegistersInUse());
|
||||||
u32 addr = (u32)(gpr.R(a).offset + offset);
|
|
||||||
if (Memory::IsRAMAddress(addr))
|
|
||||||
{
|
|
||||||
if (cpu_info.bSSSE3) {
|
|
||||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
|
||||||
WriteFloatToConstRamAddress(XMM0, addr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (addr == 0xCC008000)
|
|
||||||
{
|
|
||||||
// Float directly to write gather pipe! Fun!
|
|
||||||
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
|
||||||
// TODO
|
|
||||||
js.fifoBytesThisBlock += 4;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
|
||||||
gpr.Lock(a);
|
|
||||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
|
||||||
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
|
||||||
if (update && offset)
|
|
||||||
{
|
|
||||||
// We must flush immediate values from the following register because
|
|
||||||
// it may take another value at runtime if no MMU exception has been raised
|
|
||||||
gpr.KillImmediate(a, true, true);
|
|
||||||
|
|
||||||
MEMCHECK_START
|
|
||||||
|
|
||||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
|
||||||
|
|
||||||
MEMCHECK_END
|
|
||||||
}
|
|
||||||
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Jit64::stfsx(UGeckoInstruction inst)
|
void Jit64::stfsx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||||
|
|
||||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
@ -268,14 +117,11 @@ void Jit64::stfsx(UGeckoInstruction inst)
|
|||||||
fpr.Lock(s);
|
fpr.Lock(s);
|
||||||
fpr.BindToRegister(s, true, false);
|
fpr.BindToRegister(s, true, false);
|
||||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
SafeWriteF32ToReg(XMM0, ABI_PARAM1, 0, RegistersInUse());
|
||||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
|
|
||||||
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Jit64::lfsx(UGeckoInstruction inst)
|
void Jit64::lfsx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
@ -283,30 +129,17 @@ void Jit64::lfsx(UGeckoInstruction inst)
|
|||||||
|
|
||||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
{
|
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
}
|
|
||||||
fpr.Lock(inst.RS);
|
|
||||||
fpr.BindToRegister(inst.RS, false);
|
|
||||||
X64Reg s = fpr.RX(inst.RS);
|
|
||||||
if (cpu_info.bSSSE3 && !js.memcheck) {
|
|
||||||
#if _M_X86_32
|
|
||||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOVD_xmm(XMM0, MDisp(EAX, (u32)Memory::base));
|
|
||||||
#else
|
|
||||||
MOVD_xmm(XMM0, MComplex(RBX, EAX, SCALE_1, 0));
|
|
||||||
#endif
|
|
||||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
|
||||||
ConvertSingleToDouble(s, XMM0);
|
|
||||||
} else {
|
|
||||||
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
|
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
|
||||||
|
|
||||||
|
fpr.Lock(inst.RS);
|
||||||
|
fpr.BindToRegister(inst.RS, js.memcheck);
|
||||||
|
|
||||||
MEMCHECK_START
|
MEMCHECK_START
|
||||||
|
ConvertSingleToDouble(fpr.RX(inst.RS), EAX, true);
|
||||||
ConvertSingleToDouble(s, EAX, true);
|
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
}
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
fpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
|
@ -266,7 +266,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||||||
|
|
||||||
// Easy!
|
// Easy!
|
||||||
const u8* storeSingleFloat = AlignCode4();
|
const u8* storeSingleFloat = AlignCode4();
|
||||||
SafeWriteFloatToReg(XMM0, ECX, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||||
RET();
|
RET();
|
||||||
/*
|
/*
|
||||||
if (cpu_info.bSSSE3) {
|
if (cpu_info.bSSSE3) {
|
||||||
|
@ -101,7 +101,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
|||||||
if (accessSize == 8 && signExtend)
|
if (accessSize == 8 && signExtend)
|
||||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||||
else
|
else
|
||||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
MOVZX(64, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -110,7 +110,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
|||||||
if (accessSize == 8 && signExtend)
|
if (accessSize == 8 && signExtend)
|
||||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||||
else
|
else
|
||||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
MOVZX(64, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (opAddress.IsImm())
|
if (opAddress.IsImm())
|
||||||
@ -151,6 +151,10 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
|||||||
case 32:
|
case 32:
|
||||||
BSWAP(32, reg_value);
|
BSWAP(32, reg_value);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 64:
|
||||||
|
BSWAP(64, reg_value);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -272,6 +276,8 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Always clobbers EAX. Preserves the address.
|
||||||
|
// Preserves the value if the load fails and js.memcheck is enabled.
|
||||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
|
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
|
||||||
{
|
{
|
||||||
if (!jit->js.memcheck)
|
if (!jit->js.memcheck)
|
||||||
@ -325,7 +331,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||||||
{
|
{
|
||||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||||
}
|
}
|
||||||
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address))
|
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address) && accessSize != 64)
|
||||||
{
|
{
|
||||||
MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse,
|
MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse,
|
||||||
address, accessSize, signExtend);
|
address, accessSize, signExtend);
|
||||||
@ -335,6 +341,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
|
case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break;
|
||||||
case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break;
|
case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break;
|
||||||
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
|
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
|
||||||
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
|
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
|
||||||
@ -350,7 +357,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||||||
}
|
}
|
||||||
else if (reg_value != EAX)
|
else if (reg_value != EAX)
|
||||||
{
|
{
|
||||||
MOVZX(32, accessSize, reg_value, R(EAX));
|
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||||
}
|
}
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
@ -372,6 +379,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
|
case 64: ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); break;
|
||||||
case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break;
|
case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break;
|
||||||
case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break;
|
case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break;
|
||||||
case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break;
|
case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break;
|
||||||
@ -387,7 +395,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||||||
}
|
}
|
||||||
else if (reg_value != EAX)
|
else if (reg_value != EAX)
|
||||||
{
|
{
|
||||||
MOVZX(32, accessSize, reg_value, R(EAX));
|
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||||
}
|
}
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
@ -490,6 +498,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||||||
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
|
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
|
case 64: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); break;
|
||||||
case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break;
|
case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break;
|
||||||
case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break;
|
case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break;
|
||||||
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break;
|
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break;
|
||||||
@ -501,43 +510,12 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||||||
SetJumpTarget(exit);
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
|
// Destroys both arg registers and EAX
|
||||||
|
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags)
|
||||||
{
|
{
|
||||||
// FIXME
|
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
|
||||||
if (false && cpu_info.bSSSE3) {
|
MOVD_xmm(R(EAX), xmm_value);
|
||||||
// This path should be faster but for some reason it causes errors so I've disabled it.
|
SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags);
|
||||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
|
||||||
|
|
||||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
|
||||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
|
||||||
|
|
||||||
#ifdef ENABLE_MEM_CHECK
|
|
||||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
|
||||||
mem_mask |= Memory::EXRAM_MASK;
|
|
||||||
#endif
|
|
||||||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
|
||||||
FixupBranch argh = J_CC(CC_Z);
|
|
||||||
MOVSS(M(&float_buffer), xmm_value);
|
|
||||||
LoadAndSwap(32, EAX, M(&float_buffer));
|
|
||||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
|
||||||
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, false);
|
|
||||||
FixupBranch arg2 = J();
|
|
||||||
SetJumpTarget(argh);
|
|
||||||
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
|
|
||||||
#if _M_X86_64
|
|
||||||
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
|
|
||||||
#else
|
|
||||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
|
|
||||||
#endif
|
|
||||||
SetJumpTarget(arg2);
|
|
||||||
} else {
|
|
||||||
MOVSS(M(&float_buffer), xmm_value);
|
|
||||||
MOV(32, R(EAX), M(&float_buffer));
|
|
||||||
SafeWriteRegToReg(EAX, reg_addr, 32, 0, registersInUse, flags);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
|
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
|
||||||
|
@ -47,8 +47,7 @@ public:
|
|||||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
|
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
|
||||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
|
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
|
||||||
|
|
||||||
// Trashes both inputs and EAX.
|
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
|
||||||
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, u32 registersInUse, int flags = 0);
|
|
||||||
|
|
||||||
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
||||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||||
|
Reference in New Issue
Block a user