From 72fbdf1a6bafe8a01c58deae79fbae44535a34f4 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 22:38:58 +0200 Subject: [PATCH 1/7] Jit64: addx - Deduplicate branches part 1 No functional change, just simplify some repeated logic for the cases where the destination register matches one of the sources. --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 494969b71f..445f536d11 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1330,13 +1330,10 @@ void Jit64::addx(UGeckoInstruction inst) RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rb, Rd); - if (d == a) + if ((d == a) || (d == b)) { - ADD(32, Rd, Rb); - } - else if (d == b) - { - ADD(32, Rd, Ra); + RCOpArg& Rnotd = (d == a) ? Rb : Ra; + ADD(32, Rd, Rnotd); } else if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE) { From f1c3ab359d8113fb03bb682dc0f0171b17c137fe Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 22:55:43 +0200 Subject: [PATCH 2/7] Jit64: addx - Deduplicate branches part 2 No functional change, just simplify some repeated logic in the case where we're dealing with exactly one immediate and one simple register when overflow isn't needed. --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 445f536d11..62cfb48594 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1339,13 +1339,11 @@ void Jit64::addx(UGeckoInstruction inst) { LEA(32, Rd, MRegSum(Ra.GetSimpleReg(), Rb.GetSimpleReg())); } - else if (Ra.IsSimpleReg() && Rb.IsImm() && !inst.OE) + else if ((Ra.IsSimpleReg() || Rb.IsSimpleReg()) && (Ra.IsImm() || Rb.IsImm()) && !inst.OE) { - LEA(32, Rd, MDisp(Ra.GetSimpleReg(), Rb.SImm32())); - } - else if (Rb.IsSimpleReg() && Ra.IsImm() && !inst.OE) - { - LEA(32, Rd, MDisp(Rb.GetSimpleReg(), Ra.SImm32())); + RCOpArg& Rimm = Ra.IsImm() ? Ra : Rb; + RCOpArg& Rreg = Ra.IsImm() ? Rb : Ra; + LEA(32, Rd, MDisp(Rreg.GetSimpleReg(), Rimm.SImm32())); } else { From 1c25e6352a742542506100e93b370d8185b7a821 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 23:02:13 +0200 Subject: [PATCH 3/7] Jit64: addx - Emit nothing when possible When the destination register matches a source register, the other source register contains zero, and overflow isn't needed, the instruction becomes a nop and we don't need to emit anything. We could add specialized handling for the case where overflow is needed, but none of the titles I tried would hit this path. Before: 83 C7 00 add edi,0 After: --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 62cfb48594..39452e2512 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1333,7 +1333,10 @@ void Jit64::addx(UGeckoInstruction inst) if ((d == a) || (d == b)) { RCOpArg& Rnotd = (d == a) ? Rb : Ra; - ADD(32, Rd, Rnotd); + if (!Rnotd.IsZero() || inst.OE) + { + ADD(32, Rd, Rnotd); + } } else if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE) { From 24816605192412c0eb63f24b8d0b8ba97017846d Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 23:13:02 +0200 Subject: [PATCH 4/7] Jit64: addx - Emit MOV when possible When the source registers are a simple register and a constant zero and overflow isn't needed, emitting LEA is kinda silly. This will occasionally save a single byte for certain registers due to how x86 encoding works. More importantly, LEA takes up execution resources while MOV does not. Before: 41 8D 7D 00 lea edi,[r13] After: 41 8B FD mov edi,r13d --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 39452e2512..a34d5bf356 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1346,7 +1346,15 @@ void Jit64::addx(UGeckoInstruction inst) { RCOpArg& Rimm = Ra.IsImm() ? Ra : Rb; RCOpArg& Rreg = Ra.IsImm() ? Rb : Ra; - LEA(32, Rd, MDisp(Rreg.GetSimpleReg(), Rimm.SImm32())); + + if (Rimm.IsZero()) + { + MOV(32, Rd, Rreg); + } + else + { + LEA(32, Rd, MDisp(Rreg.GetSimpleReg(), Rimm.SImm32())); + } } else { From 50f7a7d2489944dbe79884c343fe11bcacdd3363 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 23:25:09 +0200 Subject: [PATCH 5/7] Jit64: addx - Prefer smaller MOV+ADD sequence ADD has a smaller encoding for immediates that can be expressed as an 8-bit signed integer (in other words, between -128 and 127). MOV lacks this compact representation. Since addition allows us to swap the source registers, we can always get the shortest sequence here by carefully checking if we're dealing with a small immediate first. If we are, move the other source into the destination and add the small immediate onto that. For large immediates the reverse is preferrable. Before: 41 BE 40 00 00 00 mov r14d,40h 44 03 75 A8 add r14d,dword ptr [rbp-58h] After: 44 8B 75 A8 mov r14d,dword ptr [rbp-58h] 41 83 C6 40 add r14d,40h Before: 44 8B 7D F8 mov r15d,dword ptr [rbp-8] 41 81 C7 00 68 00 CC add r15d,0CC006800h After: 41 BF 00 68 00 CC mov r15d,0CC006800h 44 03 7D F8 add r15d,dword ptr [rbp-8] --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index a34d5bf356..5eab8e7de5 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1356,6 +1356,23 @@ void Jit64::addx(UGeckoInstruction inst) LEA(32, Rd, MDisp(Rreg.GetSimpleReg(), Rimm.SImm32())); } } + else if (Ra.IsImm() || Rb.IsImm()) + { + RCOpArg& Rimm = Ra.IsImm() ? Ra : Rb; + RCOpArg& Rother = Ra.IsImm() ? Rb : Ra; + + s32 imm = Rimm.SImm32(); + if (imm >= -128 && imm <= 127) + { + MOV(32, Rd, Rother); + ADD(32, Rd, Rimm); + } + else + { + MOV(32, Rd, Rimm); + ADD(32, Rd, Rother); + } + } else { MOV(32, Rd, Ra); From 89646c898fc0507135cb56acda7a4776ccca17d1 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 23:47:47 +0200 Subject: [PATCH 6/7] Jit64: addx - Skip ADD after MOV when possible We can get away with skipping the addition when we know we're dealing with a constant zero. Just a MOV will suffice in this case. Once again, we don't bother to add separate handling for when overflow is needed, because no titles would ever hit that path during my testing. Before: 8B 7D F8 mov edi,dword ptr [rbp-8] 83 C7 00 add edi,0 After: 8B 7D F8 mov edi,dword ptr [rbp-8] --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 5eab8e7de5..8b5c7d95ad 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1365,7 +1365,10 @@ void Jit64::addx(UGeckoInstruction inst) if (imm >= -128 && imm <= 127) { MOV(32, Rd, Rother); - ADD(32, Rd, Rimm); + if (imm != 0 || inst.OE) + { + ADD(32, Rd, Rimm); + } } else { From 19dda51a0d34d9b26d8ef38139a81079b21633b4 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sun, 19 Apr 2020 23:53:56 +0200 Subject: [PATCH 7/7] Jit64: subfx - Use LEA when possible Similar to what we do for addx. Since we're calculating b - a and because subtraction is not communitative, we can only apply this when source register a holds the constant. Before: 45 8B EE mov r13d,r14d 41 83 ED 08 sub r13d,8 After: 45 8D 6E F8 lea r13d,[r14-8] --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 8b5c7d95ad..99b66d83fe 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -932,6 +932,10 @@ void Jit64::subfx(UGeckoInstruction inst) MOV(32, Rd, Rb); SUB(32, Rd, R(RSCRATCH)); } + else if (Rb.IsSimpleReg() && Ra.IsImm() && !inst.OE) + { + LEA(32, Rd, MDisp(Rb.GetSimpleReg(), -Ra.SImm32())); + } else { MOV(32, Rd, Rb);