mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-23 14:19:46 -06:00
@ -203,7 +203,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
|
|||||||
{
|
{
|
||||||
// Oh, RIP addressing.
|
// Oh, RIP addressing.
|
||||||
_offsetOrBaseReg = 5;
|
_offsetOrBaseReg = 5;
|
||||||
emit->WriteModRM(0, _operandReg&7, 5);
|
emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
|
||||||
//TODO : add some checks
|
//TODO : add some checks
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
|
u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
|
||||||
@ -327,7 +327,6 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// W = operand extended width (1 if 64-bit)
|
// W = operand extended width (1 if 64-bit)
|
||||||
// R = register# upper bit
|
// R = register# upper bit
|
||||||
// X = scale amnt upper bit
|
// X = scale amnt upper bit
|
||||||
@ -1390,6 +1389,10 @@ void XEmitter::PSRLQ(X64Reg reg, int shift) {
|
|||||||
Write8(shift);
|
Write8(shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void XEmitter::PSRLQ(X64Reg reg, OpArg arg) {
|
||||||
|
WriteSSEOp(64, 0xd3, true, reg, arg);
|
||||||
|
}
|
||||||
|
|
||||||
void XEmitter::PSLLW(X64Reg reg, int shift) {
|
void XEmitter::PSLLW(X64Reg reg, int shift) {
|
||||||
WriteSSEOp(64, 0x71, true, (X64Reg)6, R(reg));
|
WriteSSEOp(64, 0x71, true, (X64Reg)6, R(reg));
|
||||||
Write8(shift);
|
Write8(shift);
|
||||||
@ -1437,7 +1440,19 @@ void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {
|
|||||||
Write8(0x0f);
|
Write8(0x0f);
|
||||||
Write8(0x38);
|
Write8(0x38);
|
||||||
Write8(0x00);
|
Write8(0x00);
|
||||||
arg.WriteRest(this, 0);
|
arg.WriteRest(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void XEmitter::PTEST(X64Reg dest, OpArg arg) {
|
||||||
|
if (!cpu_info.bSSE4_1) {
|
||||||
|
PanicAlert("Trying to use PTEST on a system that doesn't support it. Nobody hears your screams.");
|
||||||
|
}
|
||||||
|
Write8(0x66);
|
||||||
|
Write8(0x0f);
|
||||||
|
Write8(0x38);
|
||||||
|
Write8(0x17);
|
||||||
|
arg.operandReg = dest;
|
||||||
|
arg.WriteRest(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);}
|
void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);}
|
||||||
@ -1458,7 +1473,7 @@ void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDD, true, dest
|
|||||||
void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF8, true, dest, arg);}
|
void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF8, true, dest, arg);}
|
||||||
void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF9, true, dest, arg);}
|
void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF9, true, dest, arg);}
|
||||||
void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFA, true, dest, arg);}
|
void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFA, true, dest, arg);}
|
||||||
void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);}
|
void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFB, true, dest, arg);}
|
||||||
|
|
||||||
void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE8, true, dest, arg);}
|
void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE8, true, dest, arg);}
|
||||||
void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE9, true, dest, arg);}
|
void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE9, true, dest, arg);}
|
||||||
@ -1497,6 +1512,8 @@ void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64,
|
|||||||
void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseMUL, false, regOp1, regOp2, arg);}
|
void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseMUL, false, regOp1, regOp2, arg);}
|
||||||
void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseDIV, false, regOp1, regOp2, arg);}
|
void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseDIV, false, regOp1, regOp2, arg);}
|
||||||
void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSQRT, false, regOp1, regOp2, arg);}
|
void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSQRT, false, regOp1, regOp2, arg);}
|
||||||
|
void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseAND, false, regOp1, regOp2, arg);}
|
||||||
|
void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseANDN, false, regOp1, regOp2, arg);}
|
||||||
|
|
||||||
// Prefixes
|
// Prefixes
|
||||||
|
|
||||||
@ -1509,6 +1526,25 @@ void XEmitter::FWAIT()
|
|||||||
Write8(0x9B);
|
Write8(0x9B);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: make this more generic
|
||||||
|
void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, OpArg arg)
|
||||||
|
{
|
||||||
|
int mf = 0;
|
||||||
|
switch (bits) {
|
||||||
|
case 32: mf = 0; break;
|
||||||
|
case 64: mf = 2; break;
|
||||||
|
default: _assert_msg_(DYNA_REC, 0, "WriteFloatLoadStore: bits is not 32 or 64");
|
||||||
|
}
|
||||||
|
Write8(0xd9 | (mf << 1));
|
||||||
|
// x87 instructions use the reg field of the ModR/M byte as opcode:
|
||||||
|
arg.WriteRest(this, 0, (X64Reg) op);
|
||||||
|
}
|
||||||
|
|
||||||
|
void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, src);}
|
||||||
|
void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, dest);}
|
||||||
|
void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, dest);}
|
||||||
|
void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
|
||||||
|
|
||||||
void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); }
|
void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); }
|
||||||
|
|
||||||
// helper routines for setting pointers
|
// helper routines for setting pointers
|
||||||
|
@ -100,6 +100,12 @@ enum NormalOp {
|
|||||||
nrmXCHG,
|
nrmXCHG,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum FloatOp {
|
||||||
|
floatLD = 0,
|
||||||
|
floatST = 2,
|
||||||
|
floatSTP = 3,
|
||||||
|
};
|
||||||
|
|
||||||
class XEmitter;
|
class XEmitter;
|
||||||
|
|
||||||
// RIP addressing does not benefit from micro op fusion on Core arch
|
// RIP addressing does not benefit from micro op fusion on Core arch
|
||||||
@ -118,6 +124,7 @@ struct OpArg
|
|||||||
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
|
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
|
||||||
void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const;
|
void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const;
|
||||||
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const;
|
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const;
|
||||||
|
void WriteFloatModRM(XEmitter *emit, FloatOp op);
|
||||||
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
|
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
|
||||||
// This one is public - must be written to
|
// This one is public - must be written to
|
||||||
u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
|
u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
|
||||||
@ -247,6 +254,7 @@ private:
|
|||||||
void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
|
void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
|
||||||
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
|
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
|
||||||
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
|
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
|
||||||
|
void WriteFloatLoadStore(int bits, FloatOp op, OpArg arg);
|
||||||
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
|
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -427,6 +435,28 @@ public:
|
|||||||
void REP();
|
void REP();
|
||||||
void REPNE();
|
void REPNE();
|
||||||
|
|
||||||
|
// x87
|
||||||
|
enum x87StatusWordBits {
|
||||||
|
x87_InvalidOperation = 0x1,
|
||||||
|
x87_DenormalizedOperand = 0x2,
|
||||||
|
x87_DivisionByZero = 0x4,
|
||||||
|
x87_Overflow = 0x8,
|
||||||
|
x87_Underflow = 0x10,
|
||||||
|
x87_Precision = 0x20,
|
||||||
|
x87_StackFault = 0x40,
|
||||||
|
x87_ErrorSummary = 0x80,
|
||||||
|
x87_C0 = 0x100,
|
||||||
|
x87_C1 = 0x200,
|
||||||
|
x87_C2 = 0x400,
|
||||||
|
x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
|
||||||
|
x87_C3 = 0x4000,
|
||||||
|
x87_FPUBusy = 0x8000,
|
||||||
|
};
|
||||||
|
|
||||||
|
void FLD(int bits, OpArg src);
|
||||||
|
void FST(int bits, OpArg dest);
|
||||||
|
void FSTP(int bits, OpArg dest);
|
||||||
|
void FNSTSW_AX();
|
||||||
void FWAIT();
|
void FWAIT();
|
||||||
|
|
||||||
// SSE/SSE2: Floating point arithmetic
|
// SSE/SSE2: Floating point arithmetic
|
||||||
@ -553,6 +583,7 @@ public:
|
|||||||
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
|
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
|
||||||
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
|
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
|
||||||
|
|
||||||
|
void PTEST(X64Reg dest, OpArg arg);
|
||||||
void PAND(X64Reg dest, OpArg arg);
|
void PAND(X64Reg dest, OpArg arg);
|
||||||
void PANDN(X64Reg dest, OpArg arg);
|
void PANDN(X64Reg dest, OpArg arg);
|
||||||
void PXOR(X64Reg dest, OpArg arg);
|
void PXOR(X64Reg dest, OpArg arg);
|
||||||
@ -608,6 +639,7 @@ public:
|
|||||||
void PSRLW(X64Reg reg, int shift);
|
void PSRLW(X64Reg reg, int shift);
|
||||||
void PSRLD(X64Reg reg, int shift);
|
void PSRLD(X64Reg reg, int shift);
|
||||||
void PSRLQ(X64Reg reg, int shift);
|
void PSRLQ(X64Reg reg, int shift);
|
||||||
|
void PSRLQ(X64Reg reg, OpArg arg);
|
||||||
|
|
||||||
void PSLLW(X64Reg reg, int shift);
|
void PSLLW(X64Reg reg, int shift);
|
||||||
void PSLLD(X64Reg reg, int shift);
|
void PSLLD(X64Reg reg, int shift);
|
||||||
@ -622,6 +654,8 @@ public:
|
|||||||
void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
||||||
void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
||||||
void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
||||||
|
void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
||||||
|
void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
|
||||||
|
|
||||||
void RTDSC();
|
void RTDSC();
|
||||||
|
|
||||||
|
@ -15,11 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
|
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
|
||||||
const u32 EXCEPTION_MASK = 0x1F80;
|
static const u32 EXCEPTION_MASK = 0x1F80;
|
||||||
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
|
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
|
||||||
const u32 DAZ = 0x40;
|
static const u32 DAZ = 0x40;
|
||||||
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
||||||
const u32 FTZ = 0x8000;
|
static const u32 FTZ = 0x8000;
|
||||||
|
|
||||||
namespace FPURoundMode
|
namespace FPURoundMode
|
||||||
{
|
{
|
||||||
@ -100,8 +100,7 @@ namespace FPURoundMode
|
|||||||
FTZ, // flush-to-zero only
|
FTZ, // flush-to-zero only
|
||||||
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
|
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
|
||||||
};
|
};
|
||||||
// FIXME: proper (?) non-IEEE mode emulation causes issues in lots of games
|
if (nonIEEEMode)
|
||||||
if (nonIEEEMode && false)
|
|
||||||
{
|
{
|
||||||
csr |= denormalLUT[cpu_info.bFlushToZero];
|
csr |= denormalLUT[cpu_info.bFlushToZero];
|
||||||
}
|
}
|
||||||
|
@ -231,3 +231,38 @@ inline u32 ConvertToSingleFTZ(u64 x)
|
|||||||
return (x >> 32) & 0x80000000;
|
return (x >> 32) & 0x80000000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline u64 ConvertToDouble(u32 _x)
|
||||||
|
{
|
||||||
|
// This is a little-endian re-implementation of the algorithm described in
|
||||||
|
// the PowerPC Programming Environments Manual for loading single
|
||||||
|
// precision floating point numbers.
|
||||||
|
// See page 566 of http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
|
||||||
|
|
||||||
|
u64 x = _x;
|
||||||
|
u64 exp = (x >> 23) & 0xff;
|
||||||
|
u64 frac = x & 0x007fffff;
|
||||||
|
|
||||||
|
if (exp > 0 && exp < 255) // Normal number
|
||||||
|
{
|
||||||
|
u64 y = !(exp >> 7);
|
||||||
|
u64 z = y << 61 | y << 60 | y << 59;
|
||||||
|
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
|
||||||
|
}
|
||||||
|
else if (exp == 0 && frac != 0) // Subnormal number
|
||||||
|
{
|
||||||
|
exp = 1023 - 126;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
frac <<= 1;
|
||||||
|
exp -= 1;
|
||||||
|
} while ((frac & 0x00800000) == 0);
|
||||||
|
return ((x & 0x80000000) << 32) | (exp << 52) | ((frac & 0x007fffff) << 29);
|
||||||
|
}
|
||||||
|
else // QNaN, SNaN or Zero
|
||||||
|
{
|
||||||
|
u64 y = exp >> 7;
|
||||||
|
u64 z = y << 61 | y << 60 | y << 59;
|
||||||
|
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -92,9 +92,9 @@ void Interpreter::lfs(UGeckoInstruction _inst)
|
|||||||
u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
|
u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
|
||||||
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
||||||
{
|
{
|
||||||
double value = *(float*)&uTemp;
|
u64 value = ConvertToDouble(uTemp);
|
||||||
rPS0(_inst.FD) = value;
|
riPS0(_inst.FD) = value;
|
||||||
rPS1(_inst.FD) = value;
|
riPS1(_inst.FD) = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,9 +104,9 @@ void Interpreter::lfsu(UGeckoInstruction _inst)
|
|||||||
u32 uTemp = Memory::Read_U32(uAddress);
|
u32 uTemp = Memory::Read_U32(uAddress);
|
||||||
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
||||||
{
|
{
|
||||||
double value = *(float*)&uTemp;
|
u64 value = ConvertToDouble(uTemp);
|
||||||
rPS0(_inst.FD) = value;
|
riPS0(_inst.FD) = value;
|
||||||
rPS1(_inst.FD) = value;
|
riPS1(_inst.FD) = value;
|
||||||
m_GPR[_inst.RA] = uAddress;
|
m_GPR[_inst.RA] = uAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,9 +118,9 @@ void Interpreter::lfsux(UGeckoInstruction _inst)
|
|||||||
u32 uTemp = Memory::Read_U32(uAddress);
|
u32 uTemp = Memory::Read_U32(uAddress);
|
||||||
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
||||||
{
|
{
|
||||||
double value = *(float*)&uTemp;
|
u64 value = ConvertToDouble(uTemp);
|
||||||
rPS0(_inst.FD) = value;
|
riPS0(_inst.FD) = value;
|
||||||
rPS1(_inst.FD) = value;
|
riPS1(_inst.FD) = value;
|
||||||
m_GPR[_inst.RA] = uAddress;
|
m_GPR[_inst.RA] = uAddress;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -130,9 +130,9 @@ void Interpreter::lfsx(UGeckoInstruction _inst)
|
|||||||
u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
|
u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
|
||||||
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
|
||||||
{
|
{
|
||||||
double value = *(float*)&uTemp;
|
u64 value = ConvertToDouble(uTemp);
|
||||||
rPS0(_inst.FD) = value;
|
riPS0(_inst.FD) = value;
|
||||||
rPS1(_inst.FD) = value;
|
riPS1(_inst.FD) = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,9 +281,6 @@ void Interpreter::stfdu(UGeckoInstruction _inst)
|
|||||||
|
|
||||||
void Interpreter::stfs(UGeckoInstruction _inst)
|
void Interpreter::stfs(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
//double value = rPS0(_inst.FS);
|
|
||||||
//float fTemp = (float)value;
|
|
||||||
//Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
|
|
||||||
Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA(_inst));
|
Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA(_inst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -374,7 +374,7 @@ void RegCache::Flush(FlushMode mode)
|
|||||||
{
|
{
|
||||||
if (locks[i])
|
if (locks[i])
|
||||||
{
|
{
|
||||||
PanicAlert("Someone forgot to unlock PPC reg %i.", i);
|
PanicAlert("Someone forgot to unlock PPC reg %i (X64 reg %i).", i, RX(i));
|
||||||
}
|
}
|
||||||
if (regs[i].away)
|
if (regs[i].away)
|
||||||
{
|
{
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// pshufb todo: MOVQ
|
// pshufb todo: MOVQ
|
||||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
@ -19,11 +21,10 @@ const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10,
|
|||||||
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||||
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
|
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
u64 GC_ALIGNED16(temp64);
|
u64 GC_ALIGNED16(temp64);
|
||||||
u32 GC_ALIGNED16(temp32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
||||||
// and pshufb could help a lot.
|
// and pshufb could help a lot.
|
||||||
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
|
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
|
||||||
@ -46,11 +47,9 @@ void Jit64::lfs(UGeckoInstruction inst)
|
|||||||
|
|
||||||
MEMCHECK_START
|
MEMCHECK_START
|
||||||
|
|
||||||
MOV(32, M(&temp32), R(EAX));
|
|
||||||
fpr.Lock(d);
|
fpr.Lock(d);
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
CVTSS2SD(fpr.RX(d), M(&temp32));
|
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
|
|
||||||
@ -226,13 +225,15 @@ void Jit64::stfs(UGeckoInstruction inst)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fpr.BindToRegister(s, true, false);
|
||||||
|
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||||
|
|
||||||
if (gpr.R(a).IsImm())
|
if (gpr.R(a).IsImm())
|
||||||
{
|
{
|
||||||
u32 addr = (u32)(gpr.R(a).offset + offset);
|
u32 addr = (u32)(gpr.R(a).offset + offset);
|
||||||
if (Memory::IsRAMAddress(addr))
|
if (Memory::IsRAMAddress(addr))
|
||||||
{
|
{
|
||||||
if (cpu_info.bSSSE3) {
|
if (cpu_info.bSSSE3) {
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||||
WriteFloatToConstRamAddress(XMM0, addr);
|
WriteFloatToConstRamAddress(XMM0, addr);
|
||||||
return;
|
return;
|
||||||
@ -241,7 +242,6 @@ void Jit64::stfs(UGeckoInstruction inst)
|
|||||||
else if (addr == 0xCC008000)
|
else if (addr == 0xCC008000)
|
||||||
{
|
{
|
||||||
// Float directly to write gather pipe! Fun!
|
// Float directly to write gather pipe! Fun!
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
||||||
// TODO
|
// TODO
|
||||||
js.fifoBytesThisBlock += 4;
|
js.fifoBytesThisBlock += 4;
|
||||||
@ -251,7 +251,6 @@ void Jit64::stfs(UGeckoInstruction inst)
|
|||||||
|
|
||||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||||
gpr.Lock(a);
|
gpr.Lock(a);
|
||||||
fpr.Lock(s);
|
|
||||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||||
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
||||||
if (update && offset)
|
if (update && offset)
|
||||||
@ -266,7 +265,6 @@ void Jit64::stfs(UGeckoInstruction inst)
|
|||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
}
|
}
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
|
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
@ -281,11 +279,14 @@ void Jit64::stfsx(UGeckoInstruction inst)
|
|||||||
|
|
||||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
fpr.Lock(inst.RS);
|
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
|
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
|
||||||
CVTSD2SS(XMM0, fpr.R(inst.RS));
|
|
||||||
|
int s = inst.RS;
|
||||||
|
fpr.Lock(s);
|
||||||
|
fpr.BindToRegister(s, true, false);
|
||||||
|
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
|
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
|
||||||
|
|
||||||
@ -304,21 +305,20 @@ void Jit64::lfsx(UGeckoInstruction inst)
|
|||||||
{
|
{
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
}
|
}
|
||||||
|
fpr.Lock(inst.RS);
|
||||||
|
fpr.BindToRegister(inst.RS, false);
|
||||||
|
X64Reg s = fpr.RX(inst.RS);
|
||||||
if (cpu_info.bSSSE3 && !js.memcheck) {
|
if (cpu_info.bSSSE3 && !js.memcheck) {
|
||||||
fpr.Lock(inst.RS);
|
|
||||||
fpr.BindToRegister(inst.RS, false, true);
|
|
||||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
MOVD_xmm(r, MDisp(EAX, (u32)Memory::base));
|
MOVD_xmm(XMM0, MDisp(EAX, (u32)Memory::base));
|
||||||
#else
|
#else
|
||||||
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
|
MOVD_xmm(XMM0, MComplex(RBX, EAX, SCALE_1, 0));
|
||||||
#endif
|
#endif
|
||||||
MEMCHECK_START
|
MEMCHECK_START
|
||||||
|
|
||||||
PSHUFB(r, M((void *)bswapShuffle1x4));
|
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||||
CVTSS2SD(r, R(r));
|
ConvertSingleToDouble(s, XMM0);
|
||||||
MOVDDUP(r, R(r));
|
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
} else {
|
} else {
|
||||||
@ -326,11 +326,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
|
|||||||
|
|
||||||
MEMCHECK_START
|
MEMCHECK_START
|
||||||
|
|
||||||
MOV(32, M(&temp32), R(EAX));
|
ConvertSingleToDouble(s, EAX, true);
|
||||||
CVTSS2SD(XMM0, M(&temp32));
|
|
||||||
fpr.Lock(inst.RS);
|
|
||||||
fpr.BindToRegister(inst.RS, false, true);
|
|
||||||
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
|
|
||||||
|
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
}
|
}
|
||||||
|
@ -1290,10 +1290,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||||||
}
|
}
|
||||||
case DupSingleToMReg: {
|
case DupSingleToMReg: {
|
||||||
if (!thisUsed) break;
|
if (!thisUsed) break;
|
||||||
X64Reg reg = fregURegWithoutMov(RI, I);
|
|
||||||
Jit->CVTSS2SD(reg, fregLocForInst(RI, getOp1(I)));
|
X64Reg input = fregEnsureInReg(RI, getOp1(I));
|
||||||
Jit->MOVDDUP(reg, R(reg));
|
X64Reg output = fregURegWithoutMov(RI, I);
|
||||||
RI.fregs[reg] = I;
|
Jit->ConvertSingleToDouble(output, input);
|
||||||
|
|
||||||
|
RI.fregs[output] = I;
|
||||||
fregNormalRegClear(RI, I);
|
fregNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1414,9 +1416,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||||||
}
|
}
|
||||||
case DoubleToSingle: {
|
case DoubleToSingle: {
|
||||||
if (!thisUsed) break;
|
if (!thisUsed) break;
|
||||||
X64Reg reg = fregURegWithoutMov(RI, I);
|
|
||||||
Jit->CVTSD2SS(reg, fregLocForInst(RI, getOp1(I)));
|
X64Reg input = fregEnsureInReg(RI, getOp1(I));
|
||||||
RI.fregs[reg] = I;
|
X64Reg output = fregURegWithoutMov(RI, I);
|
||||||
|
Jit->ConvertDoubleToSingle(output, input);
|
||||||
|
|
||||||
|
RI.fregs[output] = I;
|
||||||
fregNormalRegClear(RI, I);
|
fregNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -416,6 +416,200 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 GC_ALIGNED16(temp32);
|
||||||
|
static u64 GC_ALIGNED16(temp64);
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <intrin.h>
|
||||||
|
#ifdef _M_X64
|
||||||
|
static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0, 0x0000000000400000);
|
||||||
|
static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi64x(0, 0x000000007f800000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi64x(0, 0x0008000000000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi64x(0, 0x7ff0000000000000);
|
||||||
|
#else
|
||||||
|
static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi32(0, 0, 0x00000000, 0x00400000);
|
||||||
|
static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi32(0, 0, 0x00000000, 0x7f800000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi32(0, 0, 0x00080000, 0x00000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi32(0, 0, 0x7ff00000, 0x00000000);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
static const __uint128_t GC_ALIGNED16(single_qnan_bit) = 0x0000000000400000;
|
||||||
|
static const __uint128_t GC_ALIGNED16(single_exponent) = 0x000000007f800000;
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_qnan_bit) = 0x0008000000000000;
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_exponent) = 0x7ff0000000000000;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Since the following float conversion functions are used in non-arithmetic PPC float instructions,
|
||||||
|
// they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs.
|
||||||
|
// This means we can't use CVTSS2SD/CVTSD2SS :(
|
||||||
|
// The x87 FPU doesn't even support flush-to-zero so we can use FLD+FSTP even on denormals.
|
||||||
|
// If the number is a NaN, make sure to set the QNaN bit back to its original value.
|
||||||
|
|
||||||
|
// Another problem is that officially, converting doubles to single format results in undefined behavior.
|
||||||
|
// Relying on undefined behavior is a bug so no software should ever do this.
|
||||||
|
// In case it does happen, phire's more accurate implementation of ConvertDoubleToSingle() is reproduced below.
|
||||||
|
|
||||||
|
//#define MORE_ACCURATE_DOUBLETOSINGLE
|
||||||
|
#ifdef MORE_ACCURATE_DOUBLETOSINGLE
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#ifdef _M_X64
|
||||||
|
static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi64x(0, 0x000fffffffffffff);
|
||||||
|
static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0, 0x8000000000000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi64x(0, 0x0010000000000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi64x(0, 0xc000000000000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi64x(0, 0x07ffffffe0000000);
|
||||||
|
#else
|
||||||
|
static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi32(0, 0, 0x000fffff, 0xffffffff);
|
||||||
|
static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi32(0, 0, 0x80000000, 0x00000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi32(0, 0, 0x00100000, 0x00000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi32(0, 0, 0xc0000000, 0x00000000);
|
||||||
|
static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi32(0, 0, 0x07ffffff, 0xe0000000);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_fraction) = 0x000fffffffffffff;
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_sign_bit) = 0x8000000000000000;
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_explicit_top_bit) = 0x0010000000000000;
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_top_two_bits) = 0xc000000000000000;
|
||||||
|
static const __uint128_t GC_ALIGNED16(double_bottom_bits) = 0x07ffffffe0000000;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// This is the same algorithm used in the interpreter (and actual hardware)
|
||||||
|
// The documentation states that the conversion of a double with an outside the
|
||||||
|
// valid range for a single (or a single denormal) is undefined.
|
||||||
|
// But testing on actual hardware shows it always picks bits 0..1 and 5..34
|
||||||
|
// unless the exponent is in the range of 874 to 896.
|
||||||
|
void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
||||||
|
{
|
||||||
|
MOVSD(XMM1, R(src));
|
||||||
|
|
||||||
|
// Grab Exponent
|
||||||
|
PAND(XMM1, M((void *)&double_exponent));
|
||||||
|
PSRLQ(XMM1, 52);
|
||||||
|
MOVD_xmm(R(EAX), XMM1);
|
||||||
|
|
||||||
|
|
||||||
|
// Check if the double is in the range of valid single subnormal
|
||||||
|
CMP(16, R(EAX), Imm16(896));
|
||||||
|
FixupBranch NoDenormalize = J_CC(CC_G);
|
||||||
|
CMP(16, R(EAX), Imm16(874));
|
||||||
|
FixupBranch NoDenormalize2 = J_CC(CC_L);
|
||||||
|
|
||||||
|
// Denormalise
|
||||||
|
|
||||||
|
// shift = (905 - Exponent) plus the 21 bit double to single shift
|
||||||
|
MOV(16, R(EAX), Imm16(905 + 21));
|
||||||
|
MOVD_xmm(XMM0, R(EAX));
|
||||||
|
PSUBQ(XMM0, R(XMM1));
|
||||||
|
|
||||||
|
// xmm1 = fraction | 0x0010000000000000
|
||||||
|
MOVSD(XMM1, R(src));
|
||||||
|
PAND(XMM1, M((void *)&double_fraction));
|
||||||
|
POR(XMM1, M((void *)&double_explicit_top_bit));
|
||||||
|
|
||||||
|
// fraction >> shift
|
||||||
|
PSRLQ(XMM1, R(XMM0));
|
||||||
|
|
||||||
|
// OR the sign bit in.
|
||||||
|
MOVSD(XMM0, R(src));
|
||||||
|
PAND(XMM0, M((void *)&double_sign_bit));
|
||||||
|
PSRLQ(XMM0, 32);
|
||||||
|
POR(XMM1, R(XMM0));
|
||||||
|
|
||||||
|
FixupBranch end = J(false); // Goto end
|
||||||
|
|
||||||
|
SetJumpTarget(NoDenormalize);
|
||||||
|
SetJumpTarget(NoDenormalize2);
|
||||||
|
|
||||||
|
// Don't Denormalize
|
||||||
|
|
||||||
|
// We want bits 0, 1
|
||||||
|
MOVSD(XMM1, R(src));
|
||||||
|
PAND(XMM1, M((void *)&double_top_two_bits));
|
||||||
|
PSRLQ(XMM1, 32);
|
||||||
|
|
||||||
|
// And 5 through to 34
|
||||||
|
MOVSD(XMM0, R(src));
|
||||||
|
PAND(XMM0, M((void *)&double_bottom_bits));
|
||||||
|
PSRLQ(XMM0, 29);
|
||||||
|
|
||||||
|
// OR them togther
|
||||||
|
POR(XMM1, R(XMM0));
|
||||||
|
|
||||||
|
// End
|
||||||
|
SetJumpTarget(end);
|
||||||
|
MOVDDUP(dst, R(XMM1));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // MORE_ACCURATE_DOUBLETOSINGLE
|
||||||
|
|
||||||
|
void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
||||||
|
{
|
||||||
|
MOVSD(M(&temp64), src);
|
||||||
|
MOVSD(XMM1, R(src));
|
||||||
|
FLD(64, M(&temp64));
|
||||||
|
CCFlags cond;
|
||||||
|
if (cpu_info.bSSE4_1) {
|
||||||
|
PTEST(XMM1, M((void *)&double_exponent));
|
||||||
|
cond = CC_NC;
|
||||||
|
} else {
|
||||||
|
FNSTSW_AX();
|
||||||
|
TEST(16, R(AX), Imm16(x87_InvalidOperation));
|
||||||
|
cond = CC_Z;
|
||||||
|
}
|
||||||
|
FSTP(32, M(&temp32));
|
||||||
|
MOVSS(XMM0, M(&temp32));
|
||||||
|
FixupBranch dont_reset_qnan_bit = J_CC(cond);
|
||||||
|
|
||||||
|
PANDN(XMM1, M((void *)&double_qnan_bit));
|
||||||
|
PSRLQ(XMM1, 29);
|
||||||
|
if (cpu_info.bAVX) {
|
||||||
|
VPANDN(XMM0, XMM1, R(XMM0));
|
||||||
|
} else {
|
||||||
|
PANDN(XMM1, R(XMM0));
|
||||||
|
MOVSS(XMM0, R(XMM1));
|
||||||
|
}
|
||||||
|
|
||||||
|
SetJumpTarget(dont_reset_qnan_bit);
|
||||||
|
MOVDDUP(dst, R(XMM0));
|
||||||
|
}
|
||||||
|
#endif // MORE_ACCURATE_DOUBLETOSINGLE
|
||||||
|
|
||||||
|
void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr)
|
||||||
|
{
|
||||||
|
if (src_is_gpr) {
|
||||||
|
MOV(32, M(&temp32), R(src));
|
||||||
|
MOVD_xmm(XMM1, R(src));
|
||||||
|
} else {
|
||||||
|
MOVSS(M(&temp32), src);
|
||||||
|
MOVSS(R(XMM1), src);
|
||||||
|
}
|
||||||
|
FLD(32, M(&temp32));
|
||||||
|
CCFlags cond;
|
||||||
|
if (cpu_info.bSSE4_1) {
|
||||||
|
PTEST(XMM1, M((void *)&single_exponent));
|
||||||
|
cond = CC_NC;
|
||||||
|
} else {
|
||||||
|
FNSTSW_AX();
|
||||||
|
TEST(16, R(AX), Imm16(x87_InvalidOperation));
|
||||||
|
cond = CC_Z;
|
||||||
|
}
|
||||||
|
FSTP(64, M(&temp64));
|
||||||
|
MOVSD(dst, M(&temp64));
|
||||||
|
FixupBranch dont_reset_qnan_bit = J_CC(cond);
|
||||||
|
|
||||||
|
PANDN(XMM1, M((void *)&single_qnan_bit));
|
||||||
|
PSLLQ(XMM1, 29);
|
||||||
|
if (cpu_info.bAVX) {
|
||||||
|
VPANDN(dst, XMM1, R(dst));
|
||||||
|
} else {
|
||||||
|
PANDN(XMM1, R(dst));
|
||||||
|
MOVSD(dst, R(XMM1));
|
||||||
|
}
|
||||||
|
|
||||||
|
SetJumpTarget(dont_reset_qnan_bit);
|
||||||
|
MOVDDUP(dst, R(dst));
|
||||||
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::JitClearCA()
|
void EmuCodeBlock::JitClearCA()
|
||||||
{
|
{
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||||
|
@ -48,6 +48,10 @@ public:
|
|||||||
|
|
||||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||||
|
|
||||||
|
// AX might get trashed
|
||||||
|
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
|
||||||
|
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
|
||||||
protected:
|
protected:
|
||||||
std::unordered_map<u8 *, u32> registersInUseAtLoc;
|
std::unordered_map<u8 *, u32> registersInUseAtLoc;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user