From 7c2b09e1567e31b1905c17c25d15f61606813668 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 6 Nov 2021 19:15:26 +0000 Subject: [PATCH 1/2] Arm64Emitter: Add FRINTI instruction --- Source/Core/Common/Arm64Emitter.cpp | 4 ++++ Source/Core/Common/Arm64Emitter.h | 1 + 2 files changed, 5 insertions(+) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index c0d19f652f..e2d8bbf4c0 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -2913,6 +2913,10 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn) { EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn); } +void ARM64FloatEmitter::FRINTI(ARM64Reg Rd, ARM64Reg Rn) +{ + EmitScalar1Source(0, 0, IsDouble(Rd), 15, Rd, Rn); +} void ARM64FloatEmitter::FRECPE(ARM64Reg Rd, ARM64Reg Rn) { diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index c609757bd1..77927c6aac 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -1230,6 +1230,7 @@ public: void FABS(ARM64Reg Rd, ARM64Reg Rn); void FNEG(ARM64Reg Rd, ARM64Reg Rn); void FSQRT(ARM64Reg Rd, ARM64Reg Rn); + void FRINTI(ARM64Reg Rd, ARM64Reg Rn); void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP void FRECPE(ARM64Reg Rd, ARM64Reg Rn); void FRSQRTE(ARM64Reg Rd, ARM64Reg Rn); From 9c759573199292aacfcc33adea2d2ca1d44605be Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 6 Nov 2021 19:16:02 +0000 Subject: [PATCH 2/2] JitArm64_FloatingPoint: Implement fctiwx in ARM64 JIT We implement this by first rounding to nearest integer using the current rouding mode, then converting this value from floating point to an integral value. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_FloatingPoint.cpp | 28 +++++++++++++++++-- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 18 ++++++------ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index bc831360c1..de300ab87d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -144,7 +144,7 @@ public: void fselx(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); - void fctiwzx(UGeckoInstruction inst); + void fctiwx(UGeckoInstruction inst); void fresx(UGeckoInstruction inst); void frsqrtex(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index c4d63802a9..5f941958a8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -507,7 +507,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) FloatCompare(inst); } -void JitArm64::fctiwzx(UGeckoInstruction inst) +void JitArm64::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); @@ -518,19 +518,32 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) const u32 d = inst.FD; const bool single = fpr.IsSingle(b, true); + const bool is_fctiwzx = inst.SUBOP10 == 15; const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair); const ARM64Reg VD = fpr.RW(d, RegType::LowerPair); + // TODO: The upper 32 bits of the result are set to 0xfff80000, except for -0.0 where should be + // set to 0xfff80001 (TODO). + if (single) { const ARM64Reg V0 = fpr.GetReg(); + if (is_fctiwzx) + { + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z); + } + else + { + m_float_emit.FRINTI(EncodeRegToSingle(VD), EncodeRegToSingle(VB)); + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z); + } + // Generate 0xFFF8'0000'0000'0000ULL m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL); m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); - m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z); m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); fpr.Unlock(V0); @@ -539,7 +552,16 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) { const ARM64Reg WA = gpr.GetReg(); - m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z); + if (is_fctiwzx) + { + m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z); + } + else + { + m_float_emit.FRINTI(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + m_float_emit.FCVTS(WA, EncodeRegToDouble(VD), RoundingMode::Z); + } + ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, 64)); m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 5300a56054..ef68cbe1b1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -304,15 +304,15 @@ constexpr std::array table59{{ }}; constexpr std::array table63{{ - {264, &JitArm64::fp_logic}, // fabsx - {32, &JitArm64::fcmpX}, // fcmpo - {0, &JitArm64::fcmpX}, // fcmpu - {14, &JitArm64::FallBackToInterpreter}, // fctiwx - {15, &JitArm64::fctiwzx}, // fctiwzx - {72, &JitArm64::fp_logic}, // fmrx - {136, &JitArm64::fp_logic}, // fnabsx - {40, &JitArm64::fp_logic}, // fnegx - {12, &JitArm64::frspx}, // frspx + {264, &JitArm64::fp_logic}, // fabsx + {32, &JitArm64::fcmpX}, // fcmpo + {0, &JitArm64::fcmpX}, // fcmpu + {14, &JitArm64::fctiwx}, // fctiwx + {15, &JitArm64::fctiwx}, // fctiwzx + {72, &JitArm64::fp_logic}, // fmrx + {136, &JitArm64::fp_logic}, // fnabsx + {40, &JitArm64::fp_logic}, // fnegx + {12, &JitArm64::frspx}, // frspx {64, &JitArm64::mcrfs}, // mcrfs {583, &JitArm64::mffsx}, // mffsx