From d226b8f8256a82437446d601e0e77d57d94289fe Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Fri, 1 Jan 2021 19:39:24 +0100
Subject: [PATCH 1/5] Arm64Emitter: Remove optimize parameter from MOVI2R

I don't really see the use of this. (Maybe in the past it
was used for when we need a constant number of instructions
for backpatching? But we don't use MOVI2R for that now.)
---
 Source/Core/Common/Arm64Emitter.cpp                 | 13 +++++--------
 Source/Core/Common/Arm64Emitter.h                   |  2 +-
 .../Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp |  2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp
index c3cb492a5f..29cd71a132 100644
--- a/Source/Core/Common/Arm64Emitter.cpp
+++ b/Source/Core/Common/Arm64Emitter.cpp
@@ -2004,7 +2004,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
 }
 
 // Wrapper around MOVZ+MOVK (and later MOVN)
-void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
+void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm)
 {
   unsigned int parts = Is64Bit(Rd) ? 4 : 2;
   BitSet32 upload_part(0);
@@ -2041,13 +2041,10 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
   // XXX: Use MOVN when possible.
   // XXX: Optimize more
   // XXX: Support rotating immediates to save instructions
-  if (optimize)
+  for (unsigned int i = 0; i < parts; ++i)
   {
-    for (unsigned int i = 0; i < parts; ++i)
-    {
-      if ((imm >> (i * 16)) & 0xFFFF)
-        upload_part[i] = 1;
-    }
+    if ((imm >> (i * 16)) & 0xFFFF)
+      upload_part[i] = 1;
   }
 
   u64 aligned_pc = (u64)GetCodePtr() & ~0xFFF;
@@ -2090,7 +2087,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
     }
     else
     {
-      if (upload_part[i] || !optimize)
+      if (upload_part[i])
         MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
     }
   }
diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h
index 7a43495aab..18d4c29c7f 100644
--- a/Source/Core/Common/Arm64Emitter.h
+++ b/Source/Core/Common/Arm64Emitter.h
@@ -865,7 +865,7 @@ public:
   void ADRP(ARM64Reg Rd, s32 imm);
 
   // Wrapper around MOVZ+MOVK
-  void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
+  void MOVI2R(ARM64Reg Rd, u64 imm);
   bool MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2);
   template <class P>
   void MOVP2R(ARM64Reg Rd, P* ptr)
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
index dbcf424aa2..6a4600d67c 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
@@ -912,7 +912,7 @@ void JitArm64::subfex(UGeckoInstruction inst)
     ARM64Reg WA = gpr.GetReg();
     if (js.carryFlagSet)
     {
-      MOVI2R(WA, ~i + j, gpr.R(d));
+      MOVI2R(WA, ~i + j);
       ADC(gpr.R(d), WA, WZR);
     }
     else

From 4e107935ace514e55adacd2543b8fedacaaea6cd Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Wed, 13 Jan 2021 11:04:13 +0100
Subject: [PATCH 2/5] Arm64Emitter: Allow specifying 21th bit of ADRP imm

---
 Source/Core/Common/Arm64Emitter.cpp | 4 ++--
 Source/Core/Common/Arm64Emitter.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp
index 29cd71a132..5aae03ef08 100644
--- a/Source/Core/Common/Arm64Emitter.cpp
+++ b/Source/Core/Common/Arm64Emitter.cpp
@@ -1998,9 +1998,9 @@ void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm)
 {
   EncodeAddressInst(0, Rd, imm);
 }
-void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
+void ARM64XEmitter::ADRP(ARM64Reg Rd, s64 imm)
 {
-  EncodeAddressInst(1, Rd, imm >> 12);
+  EncodeAddressInst(1, Rd, static_cast<s32>(imm >> 12));
 }
 
 // Wrapper around MOVZ+MOVK (and later MOVN)
diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h
index 18d4c29c7f..7d96fb836b 100644
--- a/Source/Core/Common/Arm64Emitter.h
+++ b/Source/Core/Common/Arm64Emitter.h
@@ -862,7 +862,7 @@ public:
 
   // Address of label/page PC-relative
   void ADR(ARM64Reg Rd, s32 imm);
-  void ADRP(ARM64Reg Rd, s32 imm);
+  void ADRP(ARM64Reg Rd, s64 imm);
 
   // Wrapper around MOVZ+MOVK
   void MOVI2R(ARM64Reg Rd, u64 imm);

From 0d5ed06daf0b50d185417a0f2f1d613962588a55 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Fri, 1 Jan 2021 20:43:34 +0100
Subject: [PATCH 3/5] Arm64Emitter: Improve MOVI2R

More or less a complete rewrite of the function which aims
to be equally good or better for each given input, without
relying on special cases like the old implementation did.

In particular, we now have more extensive support for
MOVN, as mentioned in a TODO comment.
---
 Source/Core/Common/Arm64Emitter.cpp | 234 ++++++++++++++++++----------
 Source/Core/Common/Arm64Emitter.h   |   5 +-
 2 files changed, 152 insertions(+), 87 deletions(-)

diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp
index 5aae03ef08..e7e4deae88 100644
--- a/Source/Core/Common/Arm64Emitter.cpp
+++ b/Source/Core/Common/Arm64Emitter.cpp
@@ -8,6 +8,7 @@
 #include <cstring>
 #include <optional>
 #include <tuple>
+#include <utility>
 #include <vector>
 
 #include "Common/Align.h"
@@ -2003,94 +2004,155 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s64 imm)
   EncodeAddressInst(1, Rd, static_cast<s32>(imm >> 12));
 }
 
-// Wrapper around MOVZ+MOVK (and later MOVN)
+template <typename T, size_t MaxSize>
+class SmallVector final
+{
+public:
+  SmallVector() = default;
+  explicit SmallVector(size_t size) : m_size(size) {}
+
+  void push_back(const T& x) { m_array[m_size++] = x; }
+  void push_back(T&& x) { m_array[m_size++] = std::move(x); }
+
+  template <typename... Args>
+  T& emplace_back(Args&&... args)
+  {
+    return m_array[m_size++] = T{std::forward<Args>(args)...};
+  }
+
+  T& operator[](size_t i) { return m_array[i]; }
+  const T& operator[](size_t i) const { return m_array[i]; }
+
+  size_t size() const { return m_size; }
+  bool empty() const { return m_size == 0; }
+
+private:
+  std::array<T, MaxSize> m_array{};
+  size_t m_size = 0;
+};
+
+template <typename T>
+void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
+{
+  enum class Approach
+  {
+    MOVZ,
+    MOVN,
+    ADR,
+    ADRP,
+  };
+
+  struct Part
+  {
+    Part() = default;
+    Part(u16 imm_, ShiftAmount shift_) : imm(imm_), shift(shift_) {}
+
+    u16 imm;
+    ShiftAmount shift;
+  };
+
+  constexpr size_t max_parts = sizeof(T) / 2;
+
+  SmallVector<Part, max_parts> best_parts;
+  Approach best_approach;
+
+  const auto try_base = [&](T base, Approach approach, bool first_time) {
+    SmallVector<Part, max_parts> parts;
+
+    for (size_t i = 0; i < max_parts; ++i)
+    {
+      const size_t shift = i * 16;
+      const u16 imm_shifted = static_cast<u16>(imm >> shift);
+      const u16 base_shifted = static_cast<u16>(base >> shift);
+      if (imm_shifted != base_shifted)
+        parts.emplace_back(imm_shifted, static_cast<ShiftAmount>(i));
+    }
+
+    if (first_time || parts.size() < best_parts.size())
+    {
+      best_parts = std::move(parts);
+      best_approach = approach;
+    }
+  };
+
+  const auto sext_21_bit = [](u64 x) {
+    return static_cast<s64>((x & 0x1FFFFF) | (x & 0x100000 ? ~0x1FFFFF : 0));
+  };
+
+  const u64 pc = reinterpret_cast<u64>(GetCodePtr());
+  const s64 adrp_offset = sext_21_bit((imm >> 12) - (pc >> 12)) << 12;
+  const s64 adr_offset = sext_21_bit(imm - pc);
+  const u64 adrp_base = (pc & ~0xFFF) + adrp_offset;
+  const u64 adr_base = pc + adr_offset;
+
+  // First: Try approaches for which instruction_count = max(parts.size(), 1)
+  try_base(T(0), Approach::MOVZ, true);
+  try_base(~T(0), Approach::MOVN, false);
+
+  // Second: Try approaches for which instruction_count = parts.size() + 1
+  if constexpr (sizeof(T) == 8)
+  {
+    try_base(adrp_base, Approach::ADRP, false);
+    try_base(adr_base, Approach::ADR, false);
+  }
+
+  size_t parts_uploaded = 0;
+
+  // To kill any dependencies, we start with an instruction that overwrites the entire register
+  switch (best_approach)
+  {
+  case Approach::MOVZ:
+    if (best_parts.empty())
+      best_parts.emplace_back(u16(0), ShiftAmount::Shift0);
+
+    MOVZ(Rd, best_parts[0].imm, best_parts[0].shift);
+    ++parts_uploaded;
+    break;
+
+  case Approach::MOVN:
+    if (best_parts.empty())
+      best_parts.emplace_back(u16(0xFFFF), ShiftAmount::Shift0);
+
+    MOVN(Rd, static_cast<u16>(~best_parts[0].imm), best_parts[0].shift);
+    ++parts_uploaded;
+    break;
+
+  case Approach::ADR:
+    ADR(Rd, adr_offset);
+    break;
+
+  case Approach::ADRP:
+    ADRP(Rd, adrp_offset);
+    break;
+  }
+
+  // And then we use MOVK for the remaining parts
+  for (; parts_uploaded < best_parts.size(); ++parts_uploaded)
+  {
+    const Part& part = best_parts[parts_uploaded];
+
+    if (best_approach == Approach::ADRP && part.shift == ShiftAmount::Shift0)
+    {
+      // The combination of ADRP followed by ADD immediate is specifically optimized in hardware
+      ASSERT(part.imm == (adrp_base & 0xF000) + (part.imm & 0xFFF));
+      ADD(Rd, Rd, part.imm & 0xFFF);
+    }
+    else
+    {
+      MOVK(Rd, part.imm, part.shift);
+    }
+  }
+}
+
+template void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, u64 imm);
+template void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, u32 imm);
+
 void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm)
 {
-  unsigned int parts = Is64Bit(Rd) ? 4 : 2;
-  BitSet32 upload_part(0);
-
-  // Always start with a movz! Kills the dependency on the register.
-  bool use_movz = true;
-
-  if (!imm)
-  {
-    // Zero immediate, just clear the register. EOR is pointless when we have MOVZ, which looks
-    // clearer in disasm too.
-    MOVZ(Rd, 0, ShiftAmount::Shift0);
-    return;
-  }
-
-  if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) ||
-      (!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max()))
-  {
-    // Max unsigned value (or if signed, -1)
-    // Set to ~ZR
-    ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;
-    ORN(Rd, ZR, ZR, ArithOption(ZR, ShiftType::LSL, 0));
-    return;
-  }
-
-  // TODO: Make some more systemic use of MOVN, but this will take care of most cases.
-  // Small negative integer. Use MOVN
-  if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm)
-  {
-    MOVN(Rd, ~imm, ShiftAmount::Shift0);
-    return;
-  }
-
-  // XXX: Use MOVN when possible.
-  // XXX: Optimize more
-  // XXX: Support rotating immediates to save instructions
-  for (unsigned int i = 0; i < parts; ++i)
-  {
-    if ((imm >> (i * 16)) & 0xFFFF)
-      upload_part[i] = 1;
-  }
-
-  u64 aligned_pc = (u64)GetCodePtr() & ~0xFFF;
-  s64 aligned_offset = (s64)imm - (s64)aligned_pc;
-  // The offset for ADR/ADRP is an s32, so make sure it can be represented in that
-  if (upload_part.Count() > 1 && std::abs(aligned_offset) < 0x7FFFFFFFLL)
-  {
-    // Immediate we are loading is within 4GB of our aligned range
-    // Most likely a address that we can load in one or two instructions
-    if (!(std::abs(aligned_offset) & 0xFFF))
-    {
-      // Aligned ADR
-      ADRP(Rd, (s32)aligned_offset);
-      return;
-    }
-    else
-    {
-      // If the address is within 1MB of PC we can load it in a single instruction still
-      s64 offset = (s64)imm - (s64)GetCodePtr();
-      if (offset >= -0xFFFFF && offset <= 0xFFFFF)
-      {
-        ADR(Rd, (s32)offset);
-        return;
-      }
-      else
-      {
-        ADRP(Rd, (s32)(aligned_offset & ~0xFFF));
-        ADD(Rd, Rd, imm & 0xFFF);
-        return;
-      }
-    }
-  }
-
-  for (unsigned i = 0; i < parts; ++i)
-  {
-    if (use_movz && upload_part[i])
-    {
-      MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
-      use_movz = false;
-    }
-    else
-    {
-      if (upload_part[i])
-        MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
-    }
-  }
+  if (Is64Bit(Rd))
+    MOVI2RImpl<u64>(Rd, imm);
+  else
+    MOVI2RImpl<u32>(Rd, static_cast<u32>(imm));
 }
 
 bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2)
diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h
index 7d96fb836b..2a434d4bcb 100644
--- a/Source/Core/Common/Arm64Emitter.h
+++ b/Source/Core/Common/Arm64Emitter.h
@@ -521,6 +521,9 @@ private:
   void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
   void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
 
+  template <typename T>
+  void MOVI2RImpl(ARM64Reg Rd, T imm);
+
 protected:
   void Write32(u32 value);
 
@@ -864,7 +867,7 @@ public:
   void ADR(ARM64Reg Rd, s32 imm);
   void ADRP(ARM64Reg Rd, s64 imm);
 
-  // Wrapper around MOVZ+MOVK
+  // Wrapper around ADR/ADRP/MOVZ/MOVN/MOVK
   void MOVI2R(ARM64Reg Rd, u64 imm);
   bool MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2);
   template <class P>

From 9ad4f724e4497734eda69a55a742761f88f85f4c Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Sun, 3 Jan 2021 13:36:37 +0100
Subject: [PATCH 4/5] Arm64Emitter: Use ORR in MOVI2R

---
 Source/Core/Common/Arm64Emitter.cpp | 69 ++++++++++++++++++++++-------
 Source/Core/Common/Arm64Emitter.h   | 12 ++---
 2 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp
index e7e4deae88..ce40123bf1 100644
--- a/Source/Core/Common/Arm64Emitter.cpp
+++ b/Source/Core/Common/Arm64Emitter.cpp
@@ -2040,6 +2040,7 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
     MOVN,
     ADR,
     ADRP,
+    ORR,
   };
 
   struct Part
@@ -2055,6 +2056,12 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
 
   SmallVector<Part, max_parts> best_parts;
   Approach best_approach;
+  u64 best_base;
+
+  const auto instructions_required = [](const SmallVector<Part, max_parts>& parts,
+                                        Approach approach) {
+    return parts.size() + (approach > Approach::MOVN);
+  };
 
   const auto try_base = [&](T base, Approach approach, bool first_time) {
     SmallVector<Part, max_parts> parts;
@@ -2068,34 +2075,54 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
         parts.emplace_back(imm_shifted, static_cast<ShiftAmount>(i));
     }
 
-    if (first_time || parts.size() < best_parts.size())
+    if (first_time ||
+        instructions_required(parts, approach) < instructions_required(best_parts, best_approach))
     {
       best_parts = std::move(parts);
       best_approach = approach;
+      best_base = base;
     }
   };
 
+  // Try MOVZ/MOVN
+  try_base(T(0), Approach::MOVZ, true);
+  try_base(~T(0), Approach::MOVN, false);
+
+  // Try PC-relative approaches
   const auto sext_21_bit = [](u64 x) {
     return static_cast<s64>((x & 0x1FFFFF) | (x & 0x100000 ? ~0x1FFFFF : 0));
   };
-
   const u64 pc = reinterpret_cast<u64>(GetCodePtr());
   const s64 adrp_offset = sext_21_bit((imm >> 12) - (pc >> 12)) << 12;
   const s64 adr_offset = sext_21_bit(imm - pc);
   const u64 adrp_base = (pc & ~0xFFF) + adrp_offset;
   const u64 adr_base = pc + adr_offset;
-
-  // First: Try approaches for which instruction_count = max(parts.size(), 1)
-  try_base(T(0), Approach::MOVZ, true);
-  try_base(~T(0), Approach::MOVN, false);
-
-  // Second: Try approaches for which instruction_count = parts.size() + 1
   if constexpr (sizeof(T) == 8)
   {
     try_base(adrp_base, Approach::ADRP, false);
     try_base(adr_base, Approach::ADR, false);
   }
 
+  // Try ORR (or skip it if we already have a 1-instruction encoding - these tests are non-trivial)
+  if (instructions_required(best_parts, best_approach) > 1)
+  {
+    if constexpr (sizeof(T) == 8)
+    {
+      for (u64 orr_imm : {(imm << 32) | (imm & 0x0000'0000'FFFF'FFFF),
+                          (imm & 0xFFFF'FFFF'0000'0000) | (imm >> 32),
+                          (imm << 48) | (imm & 0x0000'FFFF'FFFF'0000) | (imm >> 48)})
+      {
+        if (IsImmLogical(orr_imm, 64))
+          try_base(orr_imm, Approach::ORR, false);
+      }
+    }
+    else
+    {
+      if (IsImmLogical(imm, 32))
+        try_base(imm, Approach::ORR, false);
+    }
+  }
+
   size_t parts_uploaded = 0;
 
   // To kill any dependencies, we start with an instruction that overwrites the entire register
@@ -2124,6 +2151,12 @@ void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
   case Approach::ADRP:
     ADRP(Rd, adrp_offset);
     break;
+
+  case Approach::ORR:
+    constexpr ARM64Reg zero_reg = sizeof(T) == 8 ? ZR : WZR;
+    const bool success = TryORRI2R(Rd, zero_reg, best_base);
+    ASSERT(success);
+    break;
   }
 
   // And then we use MOVK for the remaining parts
@@ -4330,7 +4363,7 @@ void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
   ADDI2R_internal(Is64Bit(Rn) ? ZR : WZR, Rn, imm, true, true, scratch);
 }
 
-bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
+bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
 {
   if (const auto result = IsImmArithmetic(imm))
   {
@@ -4342,7 +4375,7 @@ bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
   return false;
 }
 
-bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
+bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
 {
   if (const auto result = IsImmArithmetic(imm))
   {
@@ -4354,7 +4387,7 @@ bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
   return false;
 }
 
-bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm)
+bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm)
 {
   if (const auto result = IsImmArithmetic(imm))
   {
@@ -4366,9 +4399,9 @@ bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm)
   return false;
 }
 
-bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
+bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
 {
-  if (const auto result = IsImmLogical(imm, 32))
+  if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32))
   {
     const auto& [n, imm_s, imm_r] = *result;
     AND(Rd, Rn, imm_r, imm_s, n != 0);
@@ -4377,9 +4410,10 @@ bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
 
   return false;
 }
-bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
+
+bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
 {
-  if (const auto result = IsImmLogical(imm, 32))
+  if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32))
   {
     const auto& [n, imm_s, imm_r] = *result;
     ORR(Rd, Rn, imm_r, imm_s, n != 0);
@@ -4388,9 +4422,10 @@ bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
 
   return false;
 }
-bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm)
+
+bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
 {
-  if (const auto result = IsImmLogical(imm, 32))
+  if (const auto result = IsImmLogical(imm, Is64Bit(Rd) ? 64 : 32))
   {
     const auto& [n, imm_s, imm_r] = *result;
     EOR(Rd, Rn, imm_r, imm_s, n != 0);
diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h
index 2a434d4bcb..874f1d2fed 100644
--- a/Source/Core/Common/Arm64Emitter.h
+++ b/Source/Core/Common/Arm64Emitter.h
@@ -896,13 +896,13 @@ public:
   void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
   void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
 
-  bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
-  bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
-  bool TryCMPI2R(ARM64Reg Rn, u32 imm);
+  bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
+  bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
+  bool TryCMPI2R(ARM64Reg Rn, u64 imm);
 
-  bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
-  bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
-  bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
+  bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
+  bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
+  bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
 
   // ABI related
   void ABI_PushRegisters(BitSet32 registers);

From eff66c2adce7395bd779a431ac4c91034f131269 Mon Sep 17 00:00:00 2001
From: JosJuice <josjuice@gmail.com>
Date: Sat, 13 Feb 2021 11:54:46 +0100
Subject: [PATCH 5/5] Arm64Emitter: Expand the MOVI2R unit test a little

This tests for a bug with ADRP which was present in an
earlier version of this pull request.

Also adding the MOVI2R unit test to the VS build.
---
 .../Core/PowerPC/JitArm64/MovI2R.cpp          | 24 +++++++++++++++++++
 Source/UnitTests/UnitTests.vcxproj            |  3 +++
 2 files changed, 27 insertions(+)

diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/MovI2R.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/MovI2R.cpp
index 8f44259f8d..c21e47c03d 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/MovI2R.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/MovI2R.cpp
@@ -78,20 +78,44 @@ TEST(JitArm64, MovI2R_ADP)
 {
   TestMovI2R test;
   const u64 base = Common::BitCast<u64>(test.GetCodePtr());
+
+  // Test offsets around 0
   for (s64 i = -0x20000; i < 0x20000; i++)
   {
     const u64 offset = static_cast<u64>(i);
     test.Check64(base + offset);
   }
+
+  // Test offsets around the maximum
+  for (const s64 i : {-0x200000ll, 0x200000ll})
+  {
+    for (s64 j = -4; j < 4; j++)
+    {
+      const u64 offset = static_cast<u64>(i + j);
+      test.Check64(base + offset);
+    }
+  }
 }
 
 TEST(JitArm64, MovI2R_ADRP)
 {
   TestMovI2R test;
   const u64 base = Common::BitCast<u64>(test.GetCodePtr()) & ~0xFFF;
+
+  // Test offsets around 0
   for (s64 i = -0x20000; i < 0x20000; i++)
   {
     const u64 offset = static_cast<u64>(i) << 12;
     test.Check64(base + offset);
   }
+
+  // Test offsets around the maximum
+  for (const s64 i : {-0x100000000ll, -0x80000000ll, 0x80000000ll, 0x100000000ll})
+  {
+    for (s64 j = -4; j < 4; j++)
+    {
+      const u64 offset = static_cast<u64>(i + (j << 12));
+      test.Check64(base + offset);
+    }
+  }
 }
diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj
index f758af5bed..230ac50412 100644
--- a/Source/UnitTests/UnitTests.vcxproj
+++ b/Source/UnitTests/UnitTests.vcxproj
@@ -78,6 +78,9 @@
     <ClCompile Include="Core\PowerPC\Jit64Common\ConvertDoubleToSingle.cpp" />
     <ClCompile Include="Core\PowerPC\Jit64Common\Frsqrte.cpp" />
   </ItemGroup>
+  <ItemGroup Condition="'$(Platform)'=='ARM64'">
+    <ClCompile Include="Core\PowerPC\JitArm64\MovI2R.cpp" />
+  </ItemGroup>
   <ItemGroup>
     <Text Include="CMakeLists.txt" />
   </ItemGroup>