diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 1c1a8b9754..eac841d162 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1636,22 +1636,47 @@ void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, d void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only -// THESE TWO ARE UNTESTED. void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} - void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} +// Pretty much every x86 CPU nowadays supports SSE3, +// but the SSE2 fallbacks are easy. +void XEmitter::MOVSLDUP(X64Reg regOp, const OpArg& arg) +{ + if (cpu_info.bSSE3) + { + WriteSSEOp(0xF3, 0x12, regOp, arg); + } + else + { + if (!arg.IsSimpleReg(regOp)) + MOVAPD(regOp, arg); + UNPCKLPS(regOp, R(regOp)); + } +} +void XEmitter::MOVSHDUP(X64Reg regOp, const OpArg& arg) +{ + if (cpu_info.bSSE3) + { + WriteSSEOp(0xF3, 0x16, regOp, arg); + } + else + { + if (!arg.IsSimpleReg(regOp)) + MOVAPD(regOp, arg); + UNPCKHPS(regOp, R(regOp)); + } +} void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) { if (cpu_info.bSSE3) { - WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup + WriteSSEOp(0xF2, 0x12, regOp, arg); } else { - // Simulate this instruction with SSE2 instructions if (!arg.IsSimpleReg(regOp)) MOVSD(regOp, arg); UNPCKLPD(regOp, R(regOp)); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index f6faea44bd..3232cafd55 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -581,9 +581,12 @@ public: void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); - // SSE/SSE2: Useful alternative to shuffle in some cases. + // SSE3 + void MOVSLDUP(X64Reg regOp, const OpArg& arg); + void MOVSHDUP(X64Reg regOp, const OpArg& arg); void MOVDDUP(X64Reg regOp, const OpArg& arg); + // SSE/SSE2: Useful alternative to shuffle in some cases. void UNPCKLPS(X64Reg dest, const OpArg& src); void UNPCKHPS(X64Reg dest, const OpArg& src); void UNPCKLPD(X64Reg dest, const OpArg& src); diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index 766fc0436c..b6fe971546 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -721,6 +721,8 @@ TWO_OP_SSE_TEST(ANDNPD, "dqword") TWO_OP_SSE_TEST(ORPD, "dqword") TWO_OP_SSE_TEST(XORPD, "dqword") +TWO_OP_SSE_TEST(MOVSLDUP, "dqword") +TWO_OP_SSE_TEST(MOVSHDUP, "dqword") TWO_OP_SSE_TEST(MOVDDUP, "qword") TWO_OP_SSE_TEST(UNPCKLPS, "dqword")