From e9ade0abe13349143897b6e4d9ce5b2e4211d224 Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 14 Aug 2015 14:46:04 +0200 Subject: [PATCH 1/2] JitArm64: implement crXXX --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../JitArm64/JitArm64_SystemRegisters.cpp | 211 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 16 +- 3 files changed, 220 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 3a6ad6b842..91b3b91eb0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -112,6 +112,7 @@ public: void mfspr(UGeckoInstruction inst); void mftb(UGeckoInstruction inst); void mtspr(UGeckoInstruction inst); + void crXXX(UGeckoInstruction inst); // LoadStore void lXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 058a9f1d9d..7e9b673b69 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -373,3 +373,214 @@ void JitArm64::mtspr(UGeckoInstruction inst) ARM64Reg RD = gpr.R(inst.RD); STR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); } + +void JitArm64::crXXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + + FALLBACK_IF(1); + + // Special case: crclr + if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) + { + // Clear CR field bit + int field = inst.CRBD >> 2; + int bit = 3 - (inst.CRBD & 3); + + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + switch (bit) + { + case CR_SO_BIT: + AND(XA, XA, 61, 62); // XA & ~(1<<61) + break; + + case CR_EQ_BIT: + ORR(XA, XA, 1, 0); // XA | 1<<0 + break; + + case CR_GT_BIT: + ORR(XA, XA, 63, 0); // XA | 1<<63 + break; + + case CR_LT_BIT: + AND(XA, XA, 62, 62); // XA & ~(1<<62) + break; + } + STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + gpr.Unlock(WA); + return; + } + + // Special case: crset + if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) + { + // SetCRFieldBit + int field = inst.CRBD >> 2; + int bit = 3 - (inst.CRBD & 3); + + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + + if (bit != CR_GT_BIT) + { + ANDS(ZR, XA, XA); + FixupBranch dont_clear_gt = B(CC_NEQ); + ORR(XA, XA, 63, 0); // XA | 1<<63 + SetJumpTarget(dont_clear_gt); + } + + switch (bit) + { + case CR_SO_BIT: + ORR(XA, XA, 61, 0); // XA | 1<<61 + break; + + case CR_EQ_BIT: + LSR(XA, XA, 32); + LSL(XA, XA, 32); + break; + + case CR_GT_BIT: + AND(XA, XA, 63, 62); // XA & ~(1<<63) + break; + + case CR_LT_BIT: + ORR(XA, XA, 62, 0); // XA | 1<<62 + break; + } + + ORR(XA, XA, 32, 0); // XA | 1<<32 + + STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + gpr.Unlock(WA); + return; + } + + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg XB = EncodeRegTo64(WB); + + // creqv or crnand or crnor + bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; + // crandc or crorc or crnand or crnor + bool negateB = inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; + + // GetCRFieldBit + for (int i = 0; i < 2; i++) + { + int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2; + int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3); + ARM64Reg out = i ? XB : XA; + bool negate = i ? negateB : negateA; + + ARM64Reg WC = gpr.GetReg(); + ARM64Reg XC = EncodeRegTo64(WC); + LDR(INDEX_UNSIGNED, XC, X29, PPCSTATE_OFF(cr_val) + 8 * field); + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + ANDS(ZR, XC, 61, 62); // XC & ~(1<<61) + CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + break; + + case CR_EQ_BIT: // check bits 31-0 == 0 + ANDS(ZR, WC, WC); + CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + break; + + case CR_GT_BIT: // check val > 0 + ANDS(ZR, XC, XC); + CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + break; + + case CR_LT_BIT: // check bit 62 set + ANDS(ZR, XC, 62, 62); // XC & ~(1<<62) + CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + break; + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + gpr.Unlock(WC); + } + + + // Compute combined bit + switch (inst.SUBOP10) + { + case 33: // crnor: ~(A || B) == (~A && ~B) + case 129: // crandc: A && ~B + case 257: // crand: A && B + AND(XA, XA, XB); + break; + + case 193: // crxor: A ^ B + case 289: // creqv: ~(A ^ B) = ~A ^ B + EOR(XA, XA, XB); + break; + + case 225: // crnand: ~(A && B) == (~A || ~B) + case 417: // crorc: A || ~B + case 449: // cror: A || B + ORR(XA, XA, XB); + break; + } + AND(XA, XA, 0, 63-8); // A & 0xff + + // Store result bit in CRBD + int field = inst.CRBD >> 2; + int bit = 3 - (inst.CRBD & 3); + + LDR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field); + + // Gross but necessary; if the input is totally zero and we set SO or LT, + // or even just add the (1<<32), GT will suddenly end up set without us + // intending to. This can break actual games, so fix it up. + if (bit != CR_GT_BIT) + { + ANDS(ZR, XB, XB); + FixupBranch dont_clear_gt = B(CC_NEQ); + ORR(XB, XB, 63, 0); // XA | 1<<63 + SetJumpTarget(dont_clear_gt); + } + + switch (bit) + { + case CR_SO_BIT: // set bit 61 to input + AND(XB, XB, 61, 62); // XB & ~(1<<61) + LSL(XA, XA, 61); + ORR(XB, XB, XA); + break; + + case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input + LSR(XB, XB, 32); + LSL(XB, XB, 32); + EOR(XA, XA, 1, 0); // XA ^ 1<<0 + ORR(XB, XB, XA); + break; + + case CR_GT_BIT: // set bit 63 to !input + AND(XB, XB, 63, 62); // XB & ~(1<<63) + NEG(XA, XA); + LSL(XA, XA, 63); + ORR(XB, XB, XA); + break; + + case CR_LT_BIT: // set bit 62 to input + AND(XB, XB, 62, 62); // XB & ~(1<<62) + LSL(XA, XA, 62); + ORR(XB, XB, XA); + break; + } + + ORR(XB, XB, 32, 0); // XB | 1<<32 + STR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field); + + gpr.Unlock(WA); + gpr.Unlock(WB); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 048bad5eaf..3d96a9db92 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -152,14 +152,14 @@ static GekkoOPTemplate table19[] = { {528, &JitArm64::bcctrx}, // bcctrx {16, &JitArm64::bclrx}, // bclrx - {257, &JitArm64::FallBackToInterpreter}, // crand - {129, &JitArm64::FallBackToInterpreter}, // crandc - {289, &JitArm64::FallBackToInterpreter}, // creqv - {225, &JitArm64::FallBackToInterpreter}, // crnand - {33, &JitArm64::FallBackToInterpreter}, // crnor - {449, &JitArm64::FallBackToInterpreter}, // cror - {417, &JitArm64::FallBackToInterpreter}, // crorc - {193, &JitArm64::FallBackToInterpreter}, // crxor + {257, &JitArm64::crXXX}, // crand + {129, &JitArm64::crXXX}, // crandc + {289, &JitArm64::crXXX}, // creqv + {225, &JitArm64::crXXX}, // crnand + {33, &JitArm64::crXXX}, // crnor + {449, &JitArm64::crXXX}, // cror + {417, &JitArm64::crXXX}, // crorc + {193, &JitArm64::crXXX}, // crxor {150, &JitArm64::DoNothing}, // isync {0, &JitArm64::mcrf}, // mcrf From d74eb0ea586489ee70fd311eeab9fb3b3ec76658 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 21 Aug 2015 22:44:39 -0500 Subject: [PATCH 2/2] [AArch64] Fix the bugs in the cr instructions Makes it a bit more efficient in the process. --- .../JitArm64/JitArm64_SystemRegisters.cpp | 81 +++++++++---------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7e9b673b69..0c79ac16a9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -379,8 +379,6 @@ void JitArm64::crXXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - FALLBACK_IF(1); - // Special case: crclr if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) { @@ -394,19 +392,19 @@ void JitArm64::crXXX(UGeckoInstruction inst) switch (bit) { case CR_SO_BIT: - AND(XA, XA, 61, 62); // XA & ~(1<<61) + AND(XA, XA, 64 - 62, 62, true); // XA & ~(1<<61) break; case CR_EQ_BIT: - ORR(XA, XA, 1, 0); // XA | 1<<0 + ORR(XA, XA, 0, 0, true); // XA | 1<<0 break; case CR_GT_BIT: - ORR(XA, XA, 63, 0); // XA | 1<<63 + ORR(XA, XA, 64 - 63, 0, true); // XA | 1<<63 break; case CR_LT_BIT: - AND(XA, XA, 62, 62); // XA & ~(1<<62) + AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) break; } STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); @@ -427,33 +425,34 @@ void JitArm64::crXXX(UGeckoInstruction inst) if (bit != CR_GT_BIT) { - ANDS(ZR, XA, XA); - FixupBranch dont_clear_gt = B(CC_NEQ); - ORR(XA, XA, 63, 0); // XA | 1<<63 - SetJumpTarget(dont_clear_gt); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg XB = EncodeRegTo64(WB); + ORR(XB, XA, 64 - 63, 0, true); // XA | 1<<63 + CMP(XA, ZR); + CSEL(XA, XA, XB, CC_NEQ); + gpr.Unlock(WB); } switch (bit) { case CR_SO_BIT: - ORR(XA, XA, 61, 0); // XA | 1<<61 + ORR(XA, XA, 64 - 61, 0, true); // XA | 1<<61 break; case CR_EQ_BIT: - LSR(XA, XA, 32); - LSL(XA, XA, 32); + AND(XA, XA, 32, 31, true); // Clear lower 32bits break; case CR_GT_BIT: - AND(XA, XA, 63, 62); // XA & ~(1<<63) + AND(XA, XA, 0, 62, true); // XA & ~(1<<63) break; case CR_LT_BIT: - ORR(XA, XA, 62, 0); // XA | 1<<62 + ORR(XA, XA, 64 - 62, 0, true); // XA | 1<<62 break; } - ORR(XA, XA, 32, 0); // XA | 1<<32 + ORR(XA, XA, 32, 0, true); // XA | 1<<32 STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA); @@ -484,23 +483,25 @@ void JitArm64::crXXX(UGeckoInstruction inst) switch (bit) { case CR_SO_BIT: // check bit 61 set - ANDS(ZR, XC, 61, 62); // XC & ~(1<<61) - CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + UBFX(out, XC, 61, 1); + if (negate) + EOR(out, out, 0, 0, true); // XC ^ 1 break; case CR_EQ_BIT: // check bits 31-0 == 0 - ANDS(ZR, WC, WC); - CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + CMP(WC, WZR); + CSET(out, negate ? CC_NEQ : CC_EQ); break; case CR_GT_BIT: // check val > 0 - ANDS(ZR, XC, XC); - CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + CMP(XC, ZR); + CSET(out, negate ? CC_LE : CC_GT); break; case CR_LT_BIT: // check bit 62 set - ANDS(ZR, XC, 62, 62); // XC & ~(1<<62) - CSINC(out, ZR, ZR, negate ? CC_NEQ : CC_EQ); + UBFX(out, XC, 62, 1); + if (negate) + EOR(out, out, 0, 0, true); // XC ^ 1 break; default: @@ -530,7 +531,6 @@ void JitArm64::crXXX(UGeckoInstruction inst) ORR(XA, XA, XB); break; } - AND(XA, XA, 0, 63-8); // A & 0xff // Store result bit in CRBD int field = inst.CRBD >> 2; @@ -543,42 +543,37 @@ void JitArm64::crXXX(UGeckoInstruction inst) // intending to. This can break actual games, so fix it up. if (bit != CR_GT_BIT) { - ANDS(ZR, XB, XB); - FixupBranch dont_clear_gt = B(CC_NEQ); - ORR(XB, XB, 63, 0); // XA | 1<<63 - SetJumpTarget(dont_clear_gt); + ARM64Reg WC = gpr.GetReg(); + ARM64Reg XC = EncodeRegTo64(WC); + ORR(XC, XB, 64 - 63, 0, true); // XB | 1<<63 + CMP(XB, ZR); + CSEL(XB, XB, XC, CC_NEQ); + gpr.Unlock(WC); } switch (bit) { case CR_SO_BIT: // set bit 61 to input - AND(XB, XB, 61, 62); // XB & ~(1<<61) - LSL(XA, XA, 61); - ORR(XB, XB, XA); + BFI(XB, XA, 61, 1); break; case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - LSR(XB, XB, 32); - LSL(XB, XB, 32); - EOR(XA, XA, 1, 0); // XA ^ 1<<0 + AND(XB, XB, 32, 31, true); // Clear lower 32bits + EOR(XA, XA, 0, 0); // XA ^ 1<<0 ORR(XB, XB, XA); break; case CR_GT_BIT: // set bit 63 to !input - AND(XB, XB, 63, 62); // XB & ~(1<<63) - NEG(XA, XA); - LSL(XA, XA, 63); - ORR(XB, XB, XA); + EOR(XA, XA, 0, 0); // XA ^ 1<<0 + BFI(XB, XA, 63, 1); break; case CR_LT_BIT: // set bit 62 to input - AND(XB, XB, 62, 62); // XB & ~(1<<62) - LSL(XA, XA, 62); - ORR(XB, XB, XA); + BFI(XB, XA, 62, 1); break; } - ORR(XB, XB, 32, 0); // XB | 1<<32 + ORR(XA, XA, 32, 0, true); // XA | 1<<32 STR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA);