diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
index 1e69976bf7..a2dde863cd 100644
--- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
@@ -205,6 +205,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
   // we must mark them as no longer discarded
   gpr.ResetRegisters(js.op->regsOut);
   fpr.ResetRegisters(js.op->GetFregsOut());
+  gpr.ResetCRRegisters(js.op->crOut);
 
   if (js.op->opinfo->flags & FL_ENDBLOCK)
   {
@@ -1222,9 +1223,11 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
       {
         gpr.DiscardRegisters(op.gprDiscardable);
         fpr.DiscardRegisters(op.fprDiscardable);
+        gpr.DiscardCRRegisters(op.crDiscardable);
       }
       gpr.StoreRegisters(~op.gprInUse & (op.regsIn | op.regsOut));
       fpr.StoreRegisters(~op.fprInUse & (op.fregsIn | op.GetFregsOut()));
+      gpr.StoreCRRegisters(~op.crInUse & (op.crIn | op.crOut));
 
       if (opinfo->flags & FL_LOADSTORE)
         ++js.numLoadStoreInst;
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
index d4e933aaf3..5da455cb61 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
@@ -129,6 +129,8 @@ void Arm64RegCache::DiscardRegister(size_t preg)
 {
   OpArg& reg = m_guest_registers[preg];
   ARM64Reg host_reg = reg.GetReg();
+  if (!IsVector(host_reg))
+    host_reg = EncodeRegTo32(host_reg);
 
   reg.Discard();
   if (host_reg != ARM64Reg::INVALID_REG)
@@ -288,6 +290,25 @@ void Arm64GPRCache::FlushCRRegisters(BitSet8 regs, bool maintain_state, ARM64Reg
   }
 }
 
+void Arm64GPRCache::DiscardCRRegisters(BitSet8 regs)
+{
+  for (int i : regs)
+    DiscardRegister(GUEST_CR_OFFSET + i);
+}
+
+void Arm64GPRCache::ResetCRRegisters(BitSet8 regs)
+{
+  for (int i : regs)
+  {
+    OpArg& reg = m_guest_registers[GUEST_CR_OFFSET + i];
+    ARM64Reg host_reg = reg.GetReg();
+
+    ASSERT_MSG(DYNA_REC, host_reg == ARM64Reg::INVALID_REG,
+               "Attempted to reset a loaded register (did you mean to flush it?)");
+    reg.Flush();
+  }
+}
+
 void Arm64GPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg)
 {
   FlushRegisters(BitSet32(0xFFFFFFFF), mode == FlushMode::MaintainState, tmp_reg);
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
index fbe1f30134..dad43ce9ef 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
@@ -335,6 +335,9 @@ public:
     FlushCRRegisters(regs, false, tmp_reg);
   }
 
+  void DiscardCRRegisters(BitSet8 regs);
+  void ResetCRRegisters(BitSet8 regs);
+
 protected:
   // Get the order of the host registers
   void GetAllocationOrder() override;
diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp
index 9cfbbdec6b..d7c4a09a64 100644
--- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp
+++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp
@@ -222,26 +222,13 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
   // [1] https://bugs.dolphin-emu.org/issues/5864#note-7
   if (a.canCauseException || b.canCauseException)
     return false;
-  if (a_flags & FL_ENDBLOCK)
+  if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
     return false;
-  if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
-    return false;
-  if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
+  if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
     return false;
   if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
     return false;
 
-  switch (b.inst.OPCD)
-  {
-  case 16:
-  case 18:
-  // branches. Do not swap.
-  case 17:  // sc
-  case 46:  // lmw
-  case 19:  // table19 - lots of tricky stuff
-    return false;
-  }
-
   // For now, only integer ops are acceptable.
   if (b_info->type != OpType::Integer)
     return false;
@@ -249,16 +236,22 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
   // Check that we have no register collisions.
   // That is, check that none of b's outputs matches any of a's inputs,
   // and that none of a's outputs matches any of b's inputs.
-  // The latter does not apply if a is a cmp, of course, but doesn't hurt to check.
+
   // register collision: b outputs to one of a's inputs
   if (b.regsOut & a.regsIn)
     return false;
+  if (b.crOut & a.crIn)
+    return false;
   // register collision: a outputs to one of b's inputs
   if (a.regsOut & b.regsIn)
     return false;
+  if (a.crOut & b.crIn)
+    return false;
   // register collision: b outputs to one of a's outputs (overwriting it)
   if (b.regsOut & a.regsOut)
     return false;
+  if (b.crOut & a.crOut)
+    return false;
 
   return true;
 }
@@ -451,12 +444,6 @@ void FindFunctions(const Core::CPUThreadGuard& guard, u32 startAddr, u32 endAddr
                unniceSize);
 }
 
-static bool isCmp(const CodeOp& a)
-{
-  return (a.inst.OPCD == 10 || a.inst.OPCD == 11) ||
-         (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32));
-}
-
 static bool isCarryOp(const CodeOp& a)
 {
   return (a.opinfo->flags & FL_SET_CA) && !(a.opinfo->flags & FL_SET_OE) &&
@@ -506,7 +493,7 @@ void PPCAnalyzer::ReorderInstructionsCore(u32 instructions, CodeOp* code, bool r
     // Reorder integer compares, rlwinm., and carry-affecting ops
     // (if we add more merged branch instructions, add them here!)
     if ((type == ReorderType::CROR && isCror(a)) || (type == ReorderType::Carry && isCarryOp(a)) ||
-        (type == ReorderType::CMP && (isCmp(a) || a.outputCR[0])))
+        (type == ReorderType::CMP && (type == ReorderType::CMP && a.crOut)))
     {
       // once we're next to a carry instruction, don't move away!
       if (type == ReorderType::Carry && i != start)
@@ -544,11 +531,6 @@ void PPCAnalyzer::ReorderInstructionsCore(u32 instructions, CodeOp* code, bool r
 
 void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp* code) const
 {
-  // Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more
-  // picky about this, but cror seems to almost solely be used for this purpose in real code.
-  // Additionally, the other boolean ops seem to almost never be used.
-  if (HasOption(OPTION_CROR_MERGE))
-    ReorderInstructionsCore(instructions, code, true, ReorderType::CROR);
   // For carry, bubble instructions *towards* each other; one direction often isn't enough
   // to get pairs like addc/adde next to each other.
   if (HasOption(OPTION_CARRY_MERGE))
@@ -556,8 +538,16 @@ void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp* code) const
     ReorderInstructionsCore(instructions, code, false, ReorderType::Carry);
     ReorderInstructionsCore(instructions, code, true, ReorderType::Carry);
   }
+
+  // Reorder instructions which write to CR (typically compare instructions) towards branches.
   if (HasOption(OPTION_BRANCH_MERGE))
     ReorderInstructionsCore(instructions, code, false, ReorderType::CMP);
+
+  // Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more
+  // picky about this, but cror seems to almost solely be used for this purpose in real code.
+  // Additionally, the other boolean ops seem to almost never be used.
+  if (HasOption(OPTION_CROR_MERGE))
+    ReorderInstructionsCore(instructions, code, true, ReorderType::CROR);
 }
 
 void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,
@@ -570,23 +560,40 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,
     block->m_fpa->any = true;
   }
 
-  code->wantsCR = BitSet8(0);
+  code->crIn = BitSet8(0);
   if (opinfo->flags & FL_READ_ALL_CR)
-    code->wantsCR = BitSet8(0xFF);
+  {
+    code->crIn = BitSet8(0xFF);
+  }
   else if (opinfo->flags & FL_READ_CRn)
-    code->wantsCR[code->inst.CRFS] = true;
+  {
+    code->crIn[code->inst.CRFS] = true;
+  }
   else if (opinfo->flags & FL_READ_CR_BI)
-    code->wantsCR[code->inst.BI] = true;
+  {
+    code->crIn[code->inst.BI] = true;
+  }
+  else if (opinfo->type == OpType::CR)
+  {
+    code->crIn[code->inst.CRBA >> 2] = true;
+    code->crIn[code->inst.CRBB >> 2] = true;
 
-  code->outputCR = BitSet8(0);
+    // CR instructions only write to one bit of the destination CR,
+    // so treat the other three bits of the destination as inputs
+    code->crIn[code->inst.CRBD >> 2] = true;
+  }
+
+  code->crOut = BitSet8(0);
   if (opinfo->flags & FL_SET_ALL_CR)
-    code->outputCR = BitSet8(0xFF);
+    code->crOut = BitSet8(0xFF);
   else if (opinfo->flags & FL_SET_CRn)
-    code->outputCR[code->inst.CRFD] = true;
+    code->crOut[code->inst.CRFD] = true;
   else if ((opinfo->flags & FL_SET_CR0) || ((opinfo->flags & FL_RC_BIT) && code->inst.Rc))
-    code->outputCR[0] = true;
+    code->crOut[0] = true;
   else if ((opinfo->flags & FL_SET_CR1) || ((opinfo->flags & FL_RC_BIT_F) && code->inst.Rc))
-    code->outputCR[1] = true;
+    code->crOut[1] = true;
+  else if (opinfo->type == OpType::CR)
+    code->crOut[code->inst.CRBD >> 2] = true;
 
   code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0;
   code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
@@ -955,9 +962,9 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
 
   // Scan for flag dependencies; assume the next block (or any branch that can leave the block)
   // wants flags, to be safe.
-  BitSet8 wantsCR = BitSet8(0xFF);
   bool wantsFPRF = true;
   bool wantsCA = true;
+  BitSet8 crInUse, crDiscardable;
   BitSet32 gprBlockInputs, gprInUse, fprInUse, gprDiscardable, fprDiscardable, fprInXmm;
   for (int i = block->m_num_instructions - 1; i >= 0; i--)
   {
@@ -974,27 +981,26 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
     const bool hle = !!HLE::TryReplaceFunction(op.address);
     const bool may_exit_block = hle || op.canEndBlock || op.canCauseException;
 
-    const BitSet8 opWantsCR = op.wantsCR;
     const bool opWantsFPRF = op.wantsFPRF;
     const bool opWantsCA = op.wantsCA;
-    op.wantsCR = wantsCR | BitSet8(may_exit_block ? 0xFF : 0);
     op.wantsFPRF = wantsFPRF || may_exit_block;
     op.wantsCA = wantsCA || may_exit_block;
-    wantsCR |= opWantsCR | BitSet8(may_exit_block ? 0xFF : 0);
     wantsFPRF |= opWantsFPRF || may_exit_block;
     wantsCA |= opWantsCA || may_exit_block;
-    wantsCR &= ~op.outputCR | opWantsCR;
     wantsFPRF &= !op.outputFPRF || opWantsFPRF;
     wantsCA &= !op.outputCA || opWantsCA;
     op.gprInUse = gprInUse;
     op.fprInUse = fprInUse;
+    op.crInUse = crInUse;
     op.gprDiscardable = gprDiscardable;
     op.fprDiscardable = fprDiscardable;
+    op.crDiscardable = crDiscardable;
     op.fprInXmm = fprInXmm;
     gprBlockInputs &= ~op.regsOut;
     gprBlockInputs |= op.regsIn;
     gprInUse |= op.regsIn | op.regsOut;
     fprInUse |= op.fregsIn | op.GetFregsOut();
+    crInUse |= op.crIn | op.crOut;
 
     if (strncmp(op.opinfo->opname, "stfd", 4))
       fprInXmm |= op.fregsIn;
@@ -1006,11 +1012,13 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
       fprInXmm = BitSet32{};
       gprDiscardable = BitSet32{};
       fprDiscardable = BitSet32{};
+      crDiscardable = BitSet8{};
     }
     else if (op.canEndBlock || op.canCauseException)
     {
       gprDiscardable = BitSet32{};
       fprDiscardable = BitSet32{};
+      crDiscardable = BitSet8{};
     }
     else
     {
@@ -1018,6 +1026,8 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
       gprDiscardable &= ~op.regsIn;
       fprDiscardable |= op.GetFregsOut();
       fprDiscardable &= ~op.fregsIn;
+      crDiscardable |= op.crOut;
+      crDiscardable &= ~op.crIn;
     }
   }
 
diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h
index 1c24120cc9..1bd25ae476 100644
--- a/Source/Core/Core/PowerPC/PPCAnalyst.h
+++ b/Source/Core/Core/PowerPC/PPCAnalyst.h
@@ -32,10 +32,12 @@ struct CodeOp  // 16B
   const GekkoOPInfo* opinfo = nullptr;
   u32 address = 0;
   u32 branchTo = 0;  // if UINT32_MAX, not a branch
-  BitSet32 regsOut;
   BitSet32 regsIn;
+  BitSet32 regsOut;
   BitSet32 fregsIn;
   s8 fregOut = 0;
+  BitSet8 crIn;
+  BitSet8 crOut;
   bool isBranchTarget = false;
   bool branchUsesCtr = false;
   bool branchIsIdleLoop = false;
@@ -50,6 +52,8 @@ struct CodeOp  // 16B
   bool canCauseException = false;
   bool skipLRStack = false;
   bool skip = false;  // followed BL-s for example
+  BitSet8 crInUse;
+  BitSet8 crDiscardable;
   // which registers are still needed after this instruction in this block
   BitSet32 fprInUse;
   BitSet32 gprInUse;
diff --git a/Source/Core/Core/PowerPC/PPCTables.cpp b/Source/Core/Core/PowerPC/PPCTables.cpp
index 79ff5444a7..9d70fb662b 100644
--- a/Source/Core/Core/PowerPC/PPCTables.cpp
+++ b/Source/Core/Core/PowerPC/PPCTables.cpp
@@ -87,8 +87,8 @@ constexpr std::array<GekkoOPTemplate, 54> s_primary_table{{
     {38, "stb", OpType::Store, 1, FL_IN_A0 | FL_IN_S | FL_LOADSTORE},
     {39, "stbu", OpType::Store, 1, FL_OUT_A | FL_IN_A | FL_IN_S | FL_LOADSTORE},
 
-    {46, "lmw", OpType::System, 11, FL_EVIL | FL_IN_A0 | FL_LOADSTORE},
-    {47, "stmw", OpType::System, 11, FL_EVIL | FL_IN_A0 | FL_LOADSTORE},
+    {46, "lmw", OpType::System, 11, FL_IN_A0 | FL_LOADSTORE},
+    {47, "stmw", OpType::System, 11, FL_IN_A0 | FL_LOADSTORE},
 
     {48, "lfs", OpType::LoadFP, 1, FL_OUT_FLOAT_D | FL_IN_A | FL_USE_FPU | FL_LOADSTORE},
     {49, "lfsu", OpType::LoadFP, 1,
@@ -224,17 +224,17 @@ constexpr std::array<GekkoOPTemplate, 4> s_table4_3{{
 constexpr std::array<GekkoOPTemplate, 13> s_table19{{
     {528, "bcctrx", OpType::Branch, 1, FL_ENDBLOCK | FL_READ_CR_BI},
     {16, "bclrx", OpType::Branch, 1, FL_ENDBLOCK | FL_READ_CR_BI},
-    {257, "crand", OpType::CR, 1, FL_EVIL},
-    {129, "crandc", OpType::CR, 1, FL_EVIL},
-    {289, "creqv", OpType::CR, 1, FL_EVIL},
-    {225, "crnand", OpType::CR, 1, FL_EVIL},
-    {33, "crnor", OpType::CR, 1, FL_EVIL},
-    {449, "cror", OpType::CR, 1, FL_EVIL},
-    {417, "crorc", OpType::CR, 1, FL_EVIL},
-    {193, "crxor", OpType::CR, 1, FL_EVIL},
+    {257, "crand", OpType::CR, 1, 0},
+    {129, "crandc", OpType::CR, 1, 0},
+    {289, "creqv", OpType::CR, 1, 0},
+    {225, "crnand", OpType::CR, 1, 0},
+    {33, "crnor", OpType::CR, 1, 0},
+    {449, "cror", OpType::CR, 1, 0},
+    {417, "crorc", OpType::CR, 1, 0},
+    {193, "crxor", OpType::CR, 1, 0},
 
-    {150, "isync", OpType::InstructionCache, 1, FL_EVIL},
-    {0, "mcrf", OpType::System, 1, FL_EVIL | FL_SET_CRn | FL_READ_CRn},
+    {150, "isync", OpType::InstructionCache, 1, FL_NO_REORDER},
+    {0, "mcrf", OpType::System, 1, FL_SET_CRn | FL_READ_CRn},
 
     {50, "rfi", OpType::System, 2, FL_ENDBLOCK | FL_CHECKEXCEPTIONS | FL_PROGRAMEXCEPTION},
 }};
@@ -324,12 +324,12 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
     {790, "lhbrx", OpType::Load, 1, FL_OUT_D | FL_IN_A0B | FL_LOADSTORE},
 
     // Conditional load/store (Wii SMP)
-    {150, "stwcxd", OpType::Store, 1, FL_EVIL | FL_IN_S | FL_IN_A0B | FL_SET_CR0 | FL_LOADSTORE},
-    {20, "lwarx", OpType::Load, 1, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0 | FL_LOADSTORE},
+    {150, "stwcxd", OpType::Store, 1, FL_IN_S | FL_IN_A0B | FL_SET_CR0 | FL_LOADSTORE},
+    {20, "lwarx", OpType::Load, 1, FL_OUT_D | FL_IN_A0B | FL_SET_CR0 | FL_LOADSTORE},
 
     // load string (Inst these)
-    {533, "lswx", OpType::Load, 1, FL_EVIL | FL_IN_A0B | FL_OUT_D | FL_LOADSTORE},
-    {597, "lswi", OpType::Load, 1, FL_EVIL | FL_IN_A0 | FL_OUT_D | FL_LOADSTORE},
+    {533, "lswx", OpType::Load, 1, FL_IN_A0B | FL_OUT_D | FL_LOADSTORE},
+    {597, "lswi", OpType::Load, 1, FL_IN_A0 | FL_OUT_D | FL_LOADSTORE},
 
     // store word
     {151, "stwx", OpType::Store, 1, FL_IN_S | FL_IN_A0B | FL_LOADSTORE},
@@ -347,8 +347,8 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
     {662, "stwbrx", OpType::Store, 1, FL_IN_S | FL_IN_A0B | FL_LOADSTORE},
     {918, "sthbrx", OpType::Store, 1, FL_IN_S | FL_IN_A0B | FL_LOADSTORE},
 
-    {661, "stswx", OpType::Store, 1, FL_EVIL | FL_IN_A0B | FL_LOADSTORE},
-    {725, "stswi", OpType::Store, 1, FL_EVIL | FL_IN_A0 | FL_LOADSTORE},
+    {661, "stswx", OpType::Store, 1, FL_IN_A0B | FL_LOADSTORE},
+    {725, "stswi", OpType::Store, 1, FL_IN_A0 | FL_LOADSTORE},
 
     // fp load/store
     {535, "lfsx", OpType::LoadFP, 1, FL_OUT_FLOAT_D | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE},
diff --git a/Source/Core/Core/PowerPC/PPCTables.h b/Source/Core/Core/PowerPC/PPCTables.h
index 00d4baaf9f..afb265f840 100644
--- a/Source/Core/Core/PowerPC/PPCTables.h
+++ b/Source/Core/Core/PowerPC/PPCTables.h
@@ -36,18 +36,17 @@ enum InstructionFlags : u64
   FL_OUT_AD = FL_OUT_A | FL_OUT_D,
   FL_TIMER = (1ull << 15),            // Used only for mftb.
   FL_CHECKEXCEPTIONS = (1ull << 16),  // Used with rfi/rfid.
-  FL_EVIL =
-      (1ull << 17),  // Historically used to refer to instructions that messed up Super Monkey Ball.
-  FL_USE_FPU = (1ull << 18),     // Used to indicate a floating point instruction.
-  FL_LOADSTORE = (1ull << 19),   // Used to indicate a load/store instruction.
-  FL_SET_FPRF = (1ull << 20),    // Sets bits in the FPRF.
-  FL_READ_FPRF = (1ull << 21),   // Reads bits from the FPRF.
-  FL_SET_OE = (1ull << 22),      // Sets the overflow flag.
-  FL_IN_FLOAT_A = (1ull << 23),  // frA is used as an input.
-  FL_IN_FLOAT_B = (1ull << 24),  // frB is used as an input.
-  FL_IN_FLOAT_C = (1ull << 25),  // frC is used as an input.
-  FL_IN_FLOAT_S = (1ull << 26),  // frS is used as an input.
-  FL_IN_FLOAT_D = (1ull << 27),  // frD is used as an input.
+  FL_NO_REORDER = (1ull << 17),       // Instruction should not be reordered by our optimizations.
+  FL_USE_FPU = (1ull << 18),          // Used to indicate a floating point instruction.
+  FL_LOADSTORE = (1ull << 19),        // Used to indicate a load/store instruction.
+  FL_SET_FPRF = (1ull << 20),         // Sets bits in the FPRF.
+  FL_READ_FPRF = (1ull << 21),        // Reads bits from the FPRF.
+  FL_SET_OE = (1ull << 22),           // Sets the overflow flag.
+  FL_IN_FLOAT_A = (1ull << 23),       // frA is used as an input.
+  FL_IN_FLOAT_B = (1ull << 24),       // frB is used as an input.
+  FL_IN_FLOAT_C = (1ull << 25),       // frC is used as an input.
+  FL_IN_FLOAT_S = (1ull << 26),       // frS is used as an input.
+  FL_IN_FLOAT_D = (1ull << 27),       // frD is used as an input.
   FL_IN_FLOAT_AB = FL_IN_FLOAT_A | FL_IN_FLOAT_B,
   FL_IN_FLOAT_AC = FL_IN_FLOAT_A | FL_IN_FLOAT_C,
   FL_IN_FLOAT_ABC = FL_IN_FLOAT_A | FL_IN_FLOAT_B | FL_IN_FLOAT_C,