mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-23 22:29:39 -06:00
Merge pull request #2496 from Tilka/fma4
Jit64: add FMA4 support to fmaddXX
This commit is contained in:
@ -44,6 +44,7 @@ struct CPUInfo
|
||||
bool bBMI1;
|
||||
bool bBMI2;
|
||||
bool bFMA;
|
||||
bool bFMA4;
|
||||
bool bAES;
|
||||
// FXSAVE/FXRSTOR
|
||||
bool bFXSR;
|
||||
|
@ -175,6 +175,7 @@ void CPUInfo::Detect()
|
||||
__cpuid(cpu_id, 0x80000001);
|
||||
if (cpu_id[2] & 1) bLAHFSAHF64 = true;
|
||||
if ((cpu_id[2] >> 5) & 1) bLZCNT = true;
|
||||
if ((cpu_id[2] >> 16) & 1) bFMA4 = true;
|
||||
if ((cpu_id[3] >> 29) & 1) bLongMode = true;
|
||||
}
|
||||
|
||||
|
@ -1437,6 +1437,13 @@ void XEmitter::WriteFMA3Op(u8 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg
|
||||
WriteVEXOp(0x66, 0x3800 | op, regOp1, regOp2, arg, W);
|
||||
}
|
||||
|
||||
void XEmitter::WriteFMA4Op(u8 op, X64Reg dest, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int W)
|
||||
{
|
||||
if (!cpu_info.bFMA4)
|
||||
PanicAlert("Trying to use FMA4 on a system that doesn't support it. Computer is v. f'n madd.");
|
||||
WriteVEXOp4(0x66, 0x3A00 | op, dest, regOp1, arg, regOp2, W);
|
||||
}
|
||||
|
||||
void XEmitter::WriteBMIOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
|
||||
{
|
||||
CheckFlags();
|
||||
@ -1921,6 +1928,32 @@ void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {W
|
||||
void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteFMA3Op(0xA7, regOp1, regOp2, arg, 1);}
|
||||
void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteFMA3Op(0xB7, regOp1, regOp2, arg, 1);}
|
||||
|
||||
#define FMA4(name, op) \
|
||||
void XEmitter::name(X64Reg dest, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteFMA4Op(op, dest, regOp1, regOp2, arg, 1);} \
|
||||
void XEmitter::name(X64Reg dest, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteFMA4Op(op, dest, regOp1, regOp2, arg, 0);}
|
||||
|
||||
FMA4(VFMADDSUBPS, 0x5C)
|
||||
FMA4(VFMADDSUBPD, 0x5D)
|
||||
FMA4(VFMSUBADDPS, 0x5E)
|
||||
FMA4(VFMSUBADDPD, 0x5F)
|
||||
FMA4(VFMADDPS, 0x68)
|
||||
FMA4(VFMADDPD, 0x69)
|
||||
FMA4(VFMADDSS, 0x6A)
|
||||
FMA4(VFMADDSD, 0x6B)
|
||||
FMA4(VFMSUBPS, 0x6C)
|
||||
FMA4(VFMSUBPD, 0x6D)
|
||||
FMA4(VFMSUBSS, 0x6E)
|
||||
FMA4(VFMSUBSD, 0x6F)
|
||||
FMA4(VFNMADDPS, 0x78)
|
||||
FMA4(VFNMADDPD, 0x79)
|
||||
FMA4(VFNMADDSS, 0x7A)
|
||||
FMA4(VFNMADDSD, 0x7B)
|
||||
FMA4(VFNMSUBPS, 0x7C)
|
||||
FMA4(VFNMSUBPD, 0x7D)
|
||||
FMA4(VFNMSUBSS, 0x7E)
|
||||
FMA4(VFNMSUBSD, 0x7F)
|
||||
#undef FMA4
|
||||
|
||||
void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
|
||||
void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
|
||||
void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
|
||||
|
@ -291,6 +291,7 @@ private:
|
||||
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int W = 0, int extrabytes = 0);
|
||||
void WriteAVXOp4(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, X64Reg regOp3, int W = 0);
|
||||
void WriteFMA3Op(u8 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int W = 0);
|
||||
void WriteFMA4Op(u8 op, X64Reg dest, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int W = 0);
|
||||
void WriteBMIOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
@ -853,6 +854,32 @@ public:
|
||||
void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||
void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||
|
||||
#define FMA4(name) \
|
||||
void name(X64Reg dest, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); \
|
||||
void name(X64Reg dest, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
|
||||
|
||||
FMA4(VFMADDSUBPS)
|
||||
FMA4(VFMADDSUBPD)
|
||||
FMA4(VFMSUBADDPS)
|
||||
FMA4(VFMSUBADDPD)
|
||||
FMA4(VFMADDPS)
|
||||
FMA4(VFMADDPD)
|
||||
FMA4(VFMADDSS)
|
||||
FMA4(VFMADDSD)
|
||||
FMA4(VFMSUBPS)
|
||||
FMA4(VFMSUBPD)
|
||||
FMA4(VFMSUBSS)
|
||||
FMA4(VFMSUBSD)
|
||||
FMA4(VFNMADDPS)
|
||||
FMA4(VFNMADDPD)
|
||||
FMA4(VFNMADDSS)
|
||||
FMA4(VFNMADDSD)
|
||||
FMA4(VFNMSUBPS)
|
||||
FMA4(VFNMSUBPD)
|
||||
FMA4(VFNMSUBSS)
|
||||
FMA4(VFNMSUBSD)
|
||||
#undef FMA4
|
||||
|
||||
// VEX GPR instructions
|
||||
void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
|
||||
void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
|
||||
|
Reference in New Issue
Block a user