From 2d4dd8cdc11ec54639691fe762562e4e7f44a232 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 27 Apr 2019 12:54:43 +0100
Subject: [PATCH 1/3] x64Emitter: Prefer MOVAPS to MOVAPD

There is no reason to use MOVAPD over MOVAPS, for two reasons:
* There has never been a microarchitecture with separate single and double domains.
* MOVAPD is one byte longer than MOVAPS
---
 Source/Core/Common/x64Emitter.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index eb756c3793..19a982d1c2 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -2180,7 +2180,11 @@ void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg)
 }
 void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg)
 {
-  WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);
+  // Prefer MOVAPS to MOVAPD as there is no reason to use MOVAPD over MOVAPS:
+  // - They have equivalent functionality.
+  // - There has never been a microarchitecture with separate single and double domains.
+  // - MOVAPD is one byte longer than MOVAPS.
+  MOVAPS(regOp, arg);
 }
 void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp)
 {
@@ -2188,7 +2192,7 @@ void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp)
 }
 void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp)
 {
-  WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);
+  MOVAPS(arg, regOp);
 }
 
 void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg)

From 1baa8ee970c726da1aeb6a1c01f955fe28f79e57 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 27 Apr 2019 12:56:05 +0100
Subject: [PATCH 2/3] x64Emitter: Prefer MOVAPS to MOVSD

* The high half of regOp is immediately overwritten so the value in it is irrelevant.
* MOVSD produces an unnecessary dependency on the high half of regOp.
* MOVAPS is implemented as a register rename on modern microarchitectures.
---
 Source/Core/Common/x64Emitter.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index 19a982d1c2..42258d1a84 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -2429,8 +2429,14 @@ void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
   }
   else
   {
-    if (!arg.IsSimpleReg(regOp))
+    if (!arg.IsSimpleReg())
+    {
       MOVSD(regOp, arg);
+    }
+    else if (regOp != arg.GetSimpleReg())
+    {
+      MOVAPD(regOp, arg);
+    }
     UNPCKLPD(regOp, R(regOp));
   }
 }

From e06111e86fe1ea145044e7acc848b1253babe6d4 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Tue, 23 Apr 2019 11:46:50 +0100
Subject: [PATCH 3/3] EmuCodeBlock: Prefer MOVAPS to MOVSD in
 ConvertDoubleToSingle

* The high half of the register is immediately masked so the value in it is irrelevant.
* MOVSD produces an unnecessary dependency on the high half of regOp.
* MOVAPS is implemented as a register rename on modern microarchitectures.
---
 Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
index c159744b56..23d6dbceae 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
@@ -894,7 +894,7 @@ alignas(16) static const __m128i double_qnan_bit = _mm_set_epi64x(0xffffffffffff
 // unless the exponent is in the range of 874 to 896.
 void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
 {
-  MOVSD(XMM1, R(src));
+  MOVAPD(XMM1, R(src));
 
   // Grab Exponent
   PAND(XMM1, MConst(double_exponent));
@@ -914,7 +914,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   PSUBQ(XMM0, R(XMM1));
 
   // xmm1 = fraction | 0x0010000000000000
-  MOVSD(XMM1, R(src));
+  MOVAPD(XMM1, R(src));
   PAND(XMM1, MConst(double_fraction));
   POR(XMM1, MConst(double_explicit_top_bit));
 
@@ -922,7 +922,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   PSRLQ(XMM1, R(XMM0));
 
   // OR the sign bit in.
-  MOVSD(XMM0, R(src));
+  MOVAPD(XMM0, R(src));
   PAND(XMM0, MConst(double_sign_bit));
   PSRLQ(XMM0, 32);
   POR(XMM1, R(XMM0));
@@ -934,12 +934,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
   // Don't Denormalize
 
   // We want bits 0, 1
-  MOVSD(XMM1, R(src));
+  MOVAPD(XMM1, R(src));
   PAND(XMM1, MConst(double_top_two_bits));
   PSRLQ(XMM1, 32);
 
   // And 5 through to 34
-  MOVSD(XMM0, R(src));
+  MOVAPD(XMM0, R(src));
   PAND(XMM0, MConst(double_bottom_bits));
   PSRLQ(XMM0, 29);