From 9c759573199292aacfcc33adea2d2ca1d44605be Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 6 Nov 2021 19:16:02 +0000 Subject: [PATCH] JitArm64_FloatingPoint: Implement fctiwx in ARM64 JIT We implement this by first rounding to nearest integer using the current rouding mode, then converting this value from floating point to an integral value. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_FloatingPoint.cpp | 28 +++++++++++++++++-- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 18 ++++++------ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index bc831360c1..de300ab87d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -144,7 +144,7 @@ public: void fselx(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst); void frspx(UGeckoInstruction inst); - void fctiwzx(UGeckoInstruction inst); + void fctiwx(UGeckoInstruction inst); void fresx(UGeckoInstruction inst); void frsqrtex(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index c4d63802a9..5f941958a8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -507,7 +507,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) FloatCompare(inst); } -void JitArm64::fctiwzx(UGeckoInstruction inst) +void JitArm64::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); @@ -518,19 +518,32 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) const u32 d = inst.FD; const bool single = fpr.IsSingle(b, true); + const bool is_fctiwzx = inst.SUBOP10 == 15; const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair); const ARM64Reg VD = fpr.RW(d, RegType::LowerPair); + // TODO: The upper 32 bits of the result are set to 0xfff80000, except for -0.0 where should be + // set to 0xfff80001 (TODO). + if (single) { const ARM64Reg V0 = fpr.GetReg(); + if (is_fctiwzx) + { + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z); + } + else + { + m_float_emit.FRINTI(EncodeRegToSingle(VD), EncodeRegToSingle(VB)); + m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z); + } + // Generate 0xFFF8'0000'0000'0000ULL m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL); m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); - m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z); m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); fpr.Unlock(V0); @@ -539,7 +552,16 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) { const ARM64Reg WA = gpr.GetReg(); - m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z); + if (is_fctiwzx) + { + m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z); + } + else + { + m_float_emit.FRINTI(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); + m_float_emit.FCVTS(WA, EncodeRegToDouble(VD), RoundingMode::Z); + } + ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, 64)); m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 5300a56054..ef68cbe1b1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -304,15 +304,15 @@ constexpr std::array table59{{ }}; constexpr std::array table63{{ - {264, &JitArm64::fp_logic}, // fabsx - {32, &JitArm64::fcmpX}, // fcmpo - {0, &JitArm64::fcmpX}, // fcmpu - {14, &JitArm64::FallBackToInterpreter}, // fctiwx - {15, &JitArm64::fctiwzx}, // fctiwzx - {72, &JitArm64::fp_logic}, // fmrx - {136, &JitArm64::fp_logic}, // fnabsx - {40, &JitArm64::fp_logic}, // fnegx - {12, &JitArm64::frspx}, // frspx + {264, &JitArm64::fp_logic}, // fabsx + {32, &JitArm64::fcmpX}, // fcmpo + {0, &JitArm64::fcmpX}, // fcmpu + {14, &JitArm64::fctiwx}, // fctiwx + {15, &JitArm64::fctiwx}, // fctiwzx + {72, &JitArm64::fp_logic}, // fmrx + {136, &JitArm64::fp_logic}, // fnabsx + {40, &JitArm64::fp_logic}, // fnegx + {12, &JitArm64::frspx}, // frspx {64, &JitArm64::mcrfs}, // mcrfs {583, &JitArm64::mffsx}, // mffsx