diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index a1ee543baf..81087588cc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -119,6 +119,8 @@ public: void mftb(UGeckoInstruction inst); void mtspr(UGeckoInstruction inst); void crXXX(UGeckoInstruction inst); + void mfcr(UGeckoInstruction inst); + void mtcrf(UGeckoInstruction inst); // LoadStore void lXX(UGeckoInstruction inst); @@ -154,6 +156,8 @@ public: void fcmpx(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); void fctiwzx(UGeckoInstruction inst); + void fdivx(UGeckoInstruction inst); + void fdivsx(UGeckoInstruction inst); // Paired void ps_abs(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index c6c00fad3c..a02948534a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -23,7 +23,7 @@ void JitArm64::fabsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == b); ARM64Reg VB = fpr.R(b); ARM64Reg VD = fpr.R(d); @@ -64,7 +64,7 @@ void JitArm64::faddx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -111,7 +111,7 @@ void JitArm64::fmaddx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -138,7 +138,7 @@ void JitArm64::fmrx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == b); ARM64Reg VB = fpr.R(b); ARM64Reg VD = fpr.R(d); @@ -174,7 +174,7 @@ void JitArm64::fmsubx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -218,7 +218,7 @@ void JitArm64::fmulx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VC = fpr.R(c); @@ -244,7 +244,7 @@ void JitArm64::fnabsx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == b); ARM64Reg VB = fpr.R(b); ARM64Reg VD = fpr.R(d); @@ -271,7 +271,7 @@ void JitArm64::fnegx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == b); ARM64Reg VB = fpr.R(b); ARM64Reg VD = fpr.R(d); @@ -318,7 +318,7 @@ void JitArm64::fnmaddx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -367,7 +367,7 @@ void JitArm64::fnmsubx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -394,7 +394,7 @@ void JitArm64::fselx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VD = fpr.R(d); ARM64Reg VA = fpr.R(a); @@ -439,7 +439,7 @@ void JitArm64::fsubx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, d = inst.FD; - fpr.BindToRegister(d, true); + fpr.BindToRegister(d, d == a || d == b); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -593,3 +593,46 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) } fpr.Unlock(V0); } + +void JitArm64::fdivx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d); + + if (fpr.IsLower(d)) + { + m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + } + else + { + ARM64Reg V0 = fpr.GetReg(); + m_float_emit.FDIV(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + m_float_emit.INS(64, VD, 0, V0, 0); + fpr.Unlock(V0); + } +} + +void JitArm64::fdivsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + FALLBACK_IF(inst.Rc); + + u32 a = inst.FA, b = inst.FB, d = inst.FD; + fpr.BindToRegister(d, d == a || d == b, false); + + ARM64Reg VA = fpr.R(a); + ARM64Reg VB = fpr.R(b); + ARM64Reg VD = fpr.R(d, false); + + m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + m_float_emit.INS(64, VD, 1, VD, 0); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 0c79ac16a9..2293b0f012 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -579,3 +579,56 @@ void JitArm64::crXXX(UGeckoInstruction inst) gpr.Unlock(WA); gpr.Unlock(WB); } + +void JitArm64::mfcr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + + gpr.Lock(W0, W1, W2, W30); + MOVI2R(X0, (u64)asm_routines.mfcr); + BLR(X0); + gpr.Unlock(W1, W2, W30); + + gpr.BindToRegister(inst.RD, false); + MOV(gpr.R(inst.RD), W0); + + gpr.Unlock(W0); +} + +void JitArm64::mtcrf(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + + u32 crm = inst.CRM; + if (crm != 0) + { + ARM64Reg RS = gpr.R(inst.RS); + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg XB = EncodeRegTo64(WB); + MOVI2R(XB, (u64)m_crTable); + for (int i = 0; i < 8; ++i) + { + if ((crm & (0x80 >> i)) != 0) + { + if (i != 7) + LSR(WA, RS, 28 - i * 4); + if (i != 0) + { + if (i != 7) + UBFX(WA, WA, 0, 4); + else + UBFX(WA, RS, 0, 4); + } + + LDR(XA, XB, ArithOption(XA, true)); + STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * i); + } + } + gpr.Unlock(WA, WB); + } +} + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 84cef8bf70..a3383f5678 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -286,9 +286,9 @@ static GekkoOPTemplate table31[] = {759, &JitArm64::stfXX}, // stfdux {983, &JitArm64::stfXX}, // stfiwx - {19, &JitArm64::FallBackToInterpreter}, // mfcr + {19, &JitArm64::mfcr}, // mfcr {83, &JitArm64::mfmsr}, // mfmsr - {144, &JitArm64::FallBackToInterpreter}, // mtcrf + {144, &JitArm64::mtcrf}, // mtcrf {146, &JitArm64::mtmsr}, // mtmsr {210, &JitArm64::mtsr}, // mtsr {242, &JitArm64::mtsrin}, // mtsrin @@ -313,7 +313,7 @@ static GekkoOPTemplate table31[] = static GekkoOPTemplate table59[] = { - {18, &JitArm64::FallBackToInterpreter}, // fdivsx + {18, &JitArm64::fdivsx}, // fdivsx {20, &JitArm64::fsubsx}, // fsubsx {21, &JitArm64::faddsx}, // faddsx {24, &JitArm64::FallBackToInterpreter}, // fresx @@ -346,7 +346,7 @@ static GekkoOPTemplate table63[] = static GekkoOPTemplate table63_2[] = { - {18, &JitArm64::FallBackToInterpreter}, // fdivx + {18, &JitArm64::fdivx}, // fdivx {20, &JitArm64::fsubx}, // fsubx {21, &JitArm64::faddx}, // faddx {23, &JitArm64::fselx}, // fselx diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index e6abefbd98..18274ff78f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -569,4 +569,47 @@ void JitArm64AsmRoutineManager::GenerateCommon() pairedStoreQuantized[30] = storeSingleS8Slow; pairedStoreQuantized[31] = storeSingleS16Slow; + mfcr = AlignCode16(); + GenMfcr(); +} + +void JitArm64AsmRoutineManager::GenMfcr() +{ + // Input: Nothing + // Returns: W0 + // Clobbers: X1, X2 + const u8* start = GetCodePtr(); + for (int i = 0; i < 8; i++) + { + LDR(INDEX_UNSIGNED, X1, X29, PPCSTATE_OFF(cr_val) + 8 * i); + + // SO + if (i == 0) + { + UBFX(X0, X1, 61, 1); + } + else + { + ORR(W0, WZR, W0, ArithOption(W0, ST_LSL, 4)); + UBFX(X2, X1, 61, 1); + ORR(X0, X0, X2); + } + + // EQ + ORR(W2, W0, 32 - 1, 0); // W0 | 1<<1 + CMP(W1, WZR); + CSEL(W0, W2, W0, CC_EQ); + + // GT + ORR(W2, W0, 32 - 2, 0); // W0 | 1<<2 + CMP(X1, ZR); + CSEL(W0, W2, W0, CC_GT); + + // LT + UBFX(X2, X1, 62, 1); + ORR(W0, W0, W2, ArithOption(W2, ST_LSL, 3)); + } + + RET(X30); + JitRegister::Register(start, GetCodePtr(), "JIT_Mfcr"); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.h b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h index daa09d0bd2..5b4697ba6c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h @@ -12,6 +12,7 @@ class JitArm64AsmRoutineManager : public CommonAsmRoutinesBase, public Arm64Gen: private: void Generate(); void GenerateCommon(); + void GenMfcr(); public: void Init()