From afc3d30f5ce270d64cdc4c7a9151b9fb1590b818 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 29 Jun 2015 19:00:22 -0500 Subject: [PATCH 1/2] [AArch64] Implement BFI & UBFIZ in the emitter. Also fixes a bug in the UBFX instruction emitter. Naughty Naughty PPSSPP, not testing emitter functions you add. --- Source/Core/Common/Arm64Emitter.cpp | 15 +++++++++++++++ Source/Core/Common/Arm64Emitter.h | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 600b424d51..c31583e5fc 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1523,6 +1523,21 @@ void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) { EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms); } + +void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) +{ + u32 size = Is64Bit(Rn) ? 64 : 32; + _assert_msg_(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!", + __FUNCTION__, lsb, width); + EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1); +} +void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) +{ + u32 size = Is64Bit(Rn) ? 64 : 32; + _assert_msg_(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!", + __FUNCTION__, lsb, width); + EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1); +} void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) { bool sf = Is64Bit(Rd); diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index d4d95ba78d..6db19dc890 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -578,6 +578,8 @@ public: void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width); + void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width); // Extract register (ROR with two inputs, if same then faster on A67) void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift); @@ -591,7 +593,7 @@ public: void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) { - UBFM(Rd, Rn, lsb, lsb + width <= (Is64Bit(Rn) ? 64 : 32)); + UBFM(Rd, Rn, lsb, lsb + width - 1); } // Load Register (Literal) From 2cddaa09f06329971fc3ec3e4a7f76d2a88058fe Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 29 Jun 2015 19:02:30 -0500 Subject: [PATCH 2/2] [AArch64] Implement rlwimix. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../PowerPC/JitArm64/JitArm64_Integer.cpp | 76 +++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 2 +- 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 639892e441..aa9f43add3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -98,6 +98,7 @@ public: void subfx(UGeckoInstruction inst); void addcx(UGeckoInstruction inst); void slwx(UGeckoInstruction inst); + void rlwimix(UGeckoInstruction inst); // System Registers void mtmsr(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 86d85cedb8..542b636b30 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -766,3 +766,79 @@ void JitArm64::slwx(UGeckoInstruction inst) ComputeRC(gpr.R(a), 0); } } + +void JitArm64::rlwimix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA, s = inst.RS; + u32 mask = Helper_Mask(inst.MB, inst.ME); + + if (gpr.IsImm(a) && gpr.IsImm(s)) + { + u32 res = (gpr.GetImm(a) & ~mask) | (_rotl(gpr.GetImm(s), inst.SH) & mask); + gpr.SetImmediate(a, res); + if (inst.Rc) + ComputeRC(res, 0); + } + else + { + if (mask == 0 || (a == s && inst.SH == 0)) + { + // Do Nothing + } + else if (mask == 0xFFFFFFFF) + { + if (inst.SH || a != s) + gpr.BindToRegister(a, a == s); + + if (inst.SH) + ROR(gpr.R(a), gpr.R(s), 32 - inst.SH); + else if (a != s) + MOV(gpr.R(a), gpr.R(s)); + } + else if (inst.SH == 0 && inst.MB <= inst.ME) + { + // No rotation + // No mask inversion + u32 lsb = 31 - inst.ME; + u32 width = inst.ME - inst.MB + 1; + + gpr.BindToRegister(a, true); + ARM64Reg WA = gpr.GetReg(); + UBFX(WA, gpr.R(s), lsb, width); + BFI(gpr.R(a), WA, lsb, width); + gpr.Unlock(WA); + } + else if (inst.SH && inst.MB <= inst.ME) + { + // No mask inversion + u32 lsb = 31 - inst.ME; + u32 width = inst.ME - inst.MB + 1; + + gpr.BindToRegister(a, true); + ARM64Reg WA = gpr.GetReg(); + ROR(WA, gpr.R(s), 32 - inst.SH); + UBFX(WA, WA, lsb, width); + BFI(gpr.R(a), WA, lsb, width); + gpr.Unlock(WA); + } + else + { + gpr.BindToRegister(a, true); + ARM64Reg WA = gpr.GetReg(); + ARM64Reg WB = gpr.GetReg(); + + MOVI2R(WA, mask); + BIC(WB, gpr.R(a), WA); + AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ST_ROR, 32 - inst.SH)); + ORR(gpr.R(a), WB, WA); + + gpr.Unlock(WA, WB); + } + + if (inst.Rc) + ComputeRC(gpr.R(a), 0); + } +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index b25eb6ec50..fe3bb519a4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -52,7 +52,7 @@ static GekkoOPTemplate primarytable[] = {14, &JitArm64::arith_imm}, // addi {15, &JitArm64::arith_imm}, // addis - {20, &JitArm64::FallBackToInterpreter}, // rlwimix + {20, &JitArm64::rlwimix}, // rlwimix {21, &JitArm64::rlwinmx}, // rlwinmx {23, &JitArm64::FallBackToInterpreter}, // rlwnmx