From a9e0f21373d354cf6d9ab2882ca42a53a7002063 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Fri, 17 Sep 2021 22:59:00 +0200 Subject: [PATCH 1/3] Jit64: Split arithcx into addcx and subfcx JitArm64 also opts to separate the two. The shared logic makes less sense once we start adding more optimizations. --- Source/Core/Core/PowerPC/Jit64/Jit.h | 3 +- .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 8 +-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 54 ++++++++++++------- 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 35f198dc6b..7da679b86c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -150,7 +150,7 @@ public: void DynaRunTable63(UGeckoInstruction inst); void addx(UGeckoInstruction inst); - void arithcx(UGeckoInstruction inst); + void addcx(UGeckoInstruction inst); void mulli(UGeckoInstruction inst); void mulhwXx(UGeckoInstruction inst); void mullwx(UGeckoInstruction inst); @@ -233,6 +233,7 @@ public: void subfic(UGeckoInstruction inst); void subfx(UGeckoInstruction inst); + void subfcx(UGeckoInstruction inst); void twX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 24f1398b73..35b17bcdb7 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -150,8 +150,8 @@ constexpr std::array s_table19{{ constexpr std::array s_table31{{ {266, &Jit64::addx}, // addx {778, &Jit64::addx}, // addox - {10, &Jit64::arithcx}, // addcx - {522, &Jit64::arithcx}, // addcox + {10, &Jit64::addcx}, // addcx + {522, &Jit64::addcx}, // addcox {138, &Jit64::arithXex}, // addex {650, &Jit64::arithXex}, // addeox {234, &Jit64::arithXex}, // addmex @@ -170,8 +170,8 @@ constexpr std::array s_table31{{ {616, &Jit64::negx}, // negox {40, &Jit64::subfx}, // subfx {552, &Jit64::subfx}, // subfox - {8, &Jit64::arithcx}, // subfcx - {520, &Jit64::arithcx}, // subfcox + {8, &Jit64::subfcx}, // subfcx + {520, &Jit64::subfcx}, // subfcox {136, &Jit64::arithXex}, // subfex {648, &Jit64::arithXex}, // subfeox {232, &Jit64::arithXex}, // subfmex diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index be61b048cf..d85fd1a3a9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1806,11 +1806,39 @@ void Jit64::arithXex(UGeckoInstruction inst) ComputeRC(d); } -void Jit64::arithcx(UGeckoInstruction inst) +void Jit64::addcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + int a = inst.RA, b = inst.RB, d = inst.RD; + + { + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rb, Rd); + + if (d == a) + { + ADD(32, Rd, Rb); + } + else + { + if (d != b) + MOV(32, Rd, Rb); + ADD(32, Rd, Ra); + } + } + + FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(d); +} + +void Jit64::subfcx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - bool add = !!(inst.SUBOP10 & 2); // add or sub int a = inst.RA, b = inst.RB, d = inst.RD; { @@ -1821,30 +1849,20 @@ void Jit64::arithcx(UGeckoInstruction inst) if (d == a && d != b) { - if (add) - { - ADD(32, Rd, Rb); - } - else - { - // special case, because sub isn't reversible - MOV(32, R(RSCRATCH), Ra); - MOV(32, Rd, Rb); - SUB(32, Rd, R(RSCRATCH)); - } + // special case, because sub isn't reversible + MOV(32, R(RSCRATCH), Ra); + MOV(32, Rd, Rb); + SUB(32, Rd, R(RSCRATCH)); } else { if (d != b) MOV(32, Rd, Rb); - if (add) - ADD(32, Rd, Ra); - else - SUB(32, Rd, Ra); + SUB(32, Rd, Ra); } } - FinalizeCarryOverflow(inst.OE, !add); + FinalizeCarryOverflow(inst.OE, true); if (inst.Rc) ComputeRC(d); } From a725eb80ffe5d6ecc66d1247ea7f003dd09bdfb0 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Mon, 20 Sep 2021 23:56:38 +0200 Subject: [PATCH 2/3] Jit64: Merge addx and addcx They are extremely similar and can share nearly all optimizations. --- Source/Core/Core/PowerPC/Jit64/Jit.h | 1 - .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 8 +-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 53 +++++++------------ 3 files changed, 22 insertions(+), 40 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 7da679b86c..5916093295 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -150,7 +150,6 @@ public: void DynaRunTable63(UGeckoInstruction inst); void addx(UGeckoInstruction inst); - void addcx(UGeckoInstruction inst); void mulli(UGeckoInstruction inst); void mulhwXx(UGeckoInstruction inst); void mullwx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 35b17bcdb7..2d77df1d38 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -150,8 +150,8 @@ constexpr std::array s_table19{{ constexpr std::array s_table31{{ {266, &Jit64::addx}, // addx {778, &Jit64::addx}, // addox - {10, &Jit64::addcx}, // addcx - {522, &Jit64::addcx}, // addcox + {10, &Jit64::addx}, // addcx + {522, &Jit64::addx}, // addcox {138, &Jit64::arithXex}, // addex {650, &Jit64::arithXex}, // addeox {234, &Jit64::arithXex}, // addmex @@ -170,8 +170,8 @@ constexpr std::array s_table31{{ {616, &Jit64::negx}, // negox {40, &Jit64::subfx}, // subfx {552, &Jit64::subfx}, // subfox - {8, &Jit64::subfcx}, // subfcx - {520, &Jit64::subfcx}, // subfcox + {8, &Jit64::subfcx}, // subfcx + {520, &Jit64::subfcx}, // subfcox {136, &Jit64::arithXex}, // subfex {648, &Jit64::arithXex}, // subfeox {232, &Jit64::arithXex}, // subfmex diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index d85fd1a3a9..c1a6846d16 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1656,19 +1656,21 @@ void Jit64::addx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; + bool carry = !(inst.SUBOP10 & (1 << 8)); if (gpr.IsImm(a, b)) { - s32 i = gpr.SImm32(a), j = gpr.SImm32(b); + const s32 i = gpr.SImm32(a), j = gpr.SImm32(b); gpr.SetImmediate32(d, i + j); + if (carry) + FinalizeCarry(Interpreter::Helper_Carry(i, j)); if (inst.OE) GenerateConstantOverflow((s64)i + (s64)j); } else if (gpr.IsImm(a) || gpr.IsImm(b)) { - auto [i, j] = gpr.IsImm(a) ? std::pair(a, b) : std::pair(b, a); - - s32 imm = gpr.SImm32(i); + const auto [i, j] = gpr.IsImm(a) ? std::pair(a, b) : std::pair(b, a); + const s32 imm = gpr.SImm32(i); RCOpArg Rj = gpr.Use(j, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Rj, Rd); @@ -1677,16 +1679,20 @@ void Jit64::addx(UGeckoInstruction inst) { if (d != j) MOV(32, Rd, Rj); + if (carry) + FinalizeCarry(false); if (inst.OE) GenerateConstantOverflow(false); } else if (d == j) { ADD(32, Rd, Imm32(imm)); + if (carry) + FinalizeCarry(CC_C); if (inst.OE) GenerateOverflow(); } - else if (Rj.IsSimpleReg() && !inst.OE) + else if (Rj.IsSimpleReg() && !carry && !inst.OE) { LEA(32, Rd, MDisp(Rj.GetSimpleReg(), imm)); } @@ -1694,6 +1700,8 @@ void Jit64::addx(UGeckoInstruction inst) { MOV(32, Rd, Rj); ADD(32, Rd, Imm32(imm)); + if (carry) + FinalizeCarry(CC_C); if (inst.OE) GenerateOverflow(); } @@ -1701,6 +1709,8 @@ void Jit64::addx(UGeckoInstruction inst) { MOV(32, Rd, Imm32(imm)); ADD(32, Rd, Rj); + if (carry) + FinalizeCarry(CC_C); if (inst.OE) GenerateOverflow(); } @@ -1717,7 +1727,7 @@ void Jit64::addx(UGeckoInstruction inst) RCOpArg& Rnotd = (d == a) ? Rb : Ra; ADD(32, Rd, Rnotd); } - else if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE) + else if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !carry && !inst.OE) { LEA(32, Rd, MRegSum(Ra.GetSimpleReg(), Rb.GetSimpleReg())); } @@ -1726,6 +1736,8 @@ void Jit64::addx(UGeckoInstruction inst) MOV(32, Rd, Ra); ADD(32, Rd, Rb); } + if (carry) + FinalizeCarry(CC_C); if (inst.OE) GenerateOverflow(); } @@ -1806,35 +1818,6 @@ void Jit64::arithXex(UGeckoInstruction inst) ComputeRC(d); } -void Jit64::addcx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - int a = inst.RA, b = inst.RB, d = inst.RD; - - { - RCOpArg Ra = gpr.Use(a, RCMode::Read); - RCOpArg Rb = gpr.Use(b, RCMode::Read); - RCX64Reg Rd = gpr.Bind(d, RCMode::Write); - RegCache::Realize(Ra, Rb, Rd); - - if (d == a) - { - ADD(32, Rd, Rb); - } - else - { - if (d != b) - MOV(32, Rd, Rb); - ADD(32, Rd, Ra); - } - } - - FinalizeCarryOverflow(inst.OE); - if (inst.Rc) - ComputeRC(d); -} - void Jit64::subfcx(UGeckoInstruction inst) { INSTRUCTION_START From da9546cb2f7f81178412d7d5c2df7f5582636b8d Mon Sep 17 00:00:00 2001 From: Sintendo Date: Tue, 21 Sep 2021 23:09:25 +0200 Subject: [PATCH 3/3] Jit64: Merge subfx and subfcx Again, logic and optimizations are mostly the same so it makes sense. --- Source/Core/Core/PowerPC/Jit64/Jit.h | 1 - .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 4 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 59 +++++++------------ 3 files changed, 22 insertions(+), 42 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 5916093295..a0874de36b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -232,7 +232,6 @@ public: void subfic(UGeckoInstruction inst); void subfx(UGeckoInstruction inst); - void subfcx(UGeckoInstruction inst); void twX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 2d77df1d38..2a5b77b258 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -170,8 +170,8 @@ constexpr std::array s_table31{{ {616, &Jit64::negx}, // negox {40, &Jit64::subfx}, // subfx {552, &Jit64::subfx}, // subfox - {8, &Jit64::subfcx}, // subfcx - {520, &Jit64::subfcx}, // subfcox + {8, &Jit64::subfx}, // subfcx + {520, &Jit64::subfx}, // subfcox {136, &Jit64::arithXex}, // subfex {648, &Jit64::arithXex}, // subfeox {232, &Jit64::arithXex}, // subfmex diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index c1a6846d16..e5f5071bc6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -938,10 +938,13 @@ void Jit64::subfx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; + const bool carry = !(inst.SUBOP10 & (1 << 5)); if (a == b) { gpr.SetImmediate32(d, 0); + if (carry) + FinalizeCarry(true); if (inst.OE) GenerateConstantOverflow(false); } @@ -949,6 +952,8 @@ void Jit64::subfx(UGeckoInstruction inst) { s32 i = gpr.SImm32(b), j = gpr.SImm32(a); gpr.SetImmediate32(d, i - j); + if (carry) + FinalizeCarry(j == 0 || Interpreter::Helper_Carry((u32)i, 0u - (u32)j)); if (inst.OE) GenerateConstantOverflow((s64)i - (s64)j); } @@ -963,16 +968,20 @@ void Jit64::subfx(UGeckoInstruction inst) { if (d != b) MOV(32, Rd, Rb); + if (carry) + FinalizeCarry(true); if (inst.OE) GenerateConstantOverflow(false); } else if (d == b) { SUB(32, Rd, Imm32(j)); + if (carry) + FinalizeCarry(CC_NC); if (inst.OE) GenerateOverflow(); } - else if (Rb.IsSimpleReg() && !inst.OE) + else if (Rb.IsSimpleReg() && !carry && !inst.OE) { LEA(32, Rd, MDisp(Rb.GetSimpleReg(), -j)); } @@ -980,6 +989,8 @@ void Jit64::subfx(UGeckoInstruction inst) { MOV(32, Rd, Rb); SUB(32, Rd, Imm32(j)); + if (carry) + FinalizeCarry(CC_NC); if (inst.OE) GenerateOverflow(); } @@ -993,6 +1004,8 @@ void Jit64::subfx(UGeckoInstruction inst) if (d != a) MOV(32, Rd, Ra); NEG(32, Rd); + if (carry) + FinalizeCarry(CC_NC); if (inst.OE) GenerateOverflow(); } @@ -1003,21 +1016,21 @@ void Jit64::subfx(UGeckoInstruction inst) RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rb, Rd); - if (d == b) - { - SUB(32, Rd, Ra); - } - else if (d == a) + if (d == a && d != b) { + // special case, because sub isn't reversible MOV(32, R(RSCRATCH), Ra); MOV(32, Rd, Rb); SUB(32, Rd, R(RSCRATCH)); } else { - MOV(32, Rd, Rb); + if (d != b) + MOV(32, Rd, Rb); SUB(32, Rd, Ra); } + if (carry) + FinalizeCarry(CC_NC); if (inst.OE) GenerateOverflow(); } @@ -1818,38 +1831,6 @@ void Jit64::arithXex(UGeckoInstruction inst) ComputeRC(d); } -void Jit64::subfcx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - int a = inst.RA, b = inst.RB, d = inst.RD; - - { - RCOpArg Ra = gpr.Use(a, RCMode::Read); - RCOpArg Rb = gpr.Use(b, RCMode::Read); - RCX64Reg Rd = gpr.Bind(d, RCMode::Write); - RegCache::Realize(Ra, Rb, Rd); - - if (d == a && d != b) - { - // special case, because sub isn't reversible - MOV(32, R(RSCRATCH), Ra); - MOV(32, Rd, Rb); - SUB(32, Rd, R(RSCRATCH)); - } - else - { - if (d != b) - MOV(32, Rd, Rb); - SUB(32, Rd, Ra); - } - } - - FinalizeCarryOverflow(inst.OE, true); - if (inst.Rc) - ComputeRC(d); -} - void Jit64::rlwinmx(UGeckoInstruction inst) { INSTRUCTION_START