From 171f76ae07c7895cd91ca4a105572218601b98a5 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Tue, 24 Oct 2023 00:42:30 +0200 Subject: [PATCH] Jit_LoadStore: Another minor dcbx optimization The multiplication needs the value from RSCRATCH2, but shouldn't overwrite it as it is still needed later. The original code solved this by copying RSCRATCH2 to another register first. As it turns out, the other register involved in the multiplication can safely be overwritten, so we can swap the operands around and use RSCRATCH2 directly without making a copy. Before: 33 D2 xor edx,edx 8B 45 64 mov eax,dword ptr [rbp+64h] 85 C0 test eax,eax 7E 30 jle 000002D4DF373F6B 44 8B B5 D4 02 00 00 mov r14d,dword ptr [rbp+2D4h] 44 8B E8 mov r13d,eax BF 07 00 00 00 mov edi,7 F7 F7 div eax,edi 41 8D 56 FF lea edx,[r14-1] 3B C2 cmp eax,edx 0F 42 D0 cmovb edx,eax 44 2B F2 sub r14d,edx 44 89 B5 D4 02 00 00 mov dword ptr [rbp+2D4h],r14d 8B C2 mov eax,edx 0F AF C7 imul eax,edi 44 2B E8 sub r13d,eax 44 89 6D 64 mov dword ptr [rbp+64h],r13d 44 8D 72 01 lea r14d,[rdx+1] After: 33 D2 xor edx,edx 8B 45 64 mov eax,dword ptr [rbp+64h] 85 C0 test eax,eax 7E 2E jle 0000021C01013F69 44 8B B5 D4 02 00 00 mov r14d,dword ptr [rbp+2D4h] 44 8B E8 mov r13d,eax BF 07 00 00 00 mov edi,7 F7 F7 div eax,edi 41 8D 56 FF lea edx,[r14-1] 3B C2 cmp eax,edx 0F 42 D0 cmovb edx,eax 44 2B F2 sub r14d,edx 44 89 B5 D4 02 00 00 mov dword ptr [rbp+2D4h],r14d 0F AF FA imul edi,edx 44 2B EF sub r13d,edi 44 89 6D 64 mov dword ptr [rbp+64h],r13d 44 8D 72 01 lea r14d,[rdx+1] --- Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 9d385d10c0..7a7461713e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -291,10 +291,9 @@ void Jit64::dcbx(UGeckoInstruction inst) // registers. SUB(32, R(loop_counter), R(RSCRATCH2)); MOV(32, PPCSTATE_CTR, R(loop_counter)); // CTR -= RSCRATCH2 - MOV(32, R(RSCRATCH), R(RSCRATCH2)); - IMUL(32, RSCRATCH, R(reg_cycle_count)); + IMUL(32, reg_cycle_count, R(RSCRATCH2)); // ^ Note that this cannot overflow because it's limited by (downcount/cycle_count). - SUB(32, R(reg_downcount), R(RSCRATCH)); + SUB(32, R(reg_downcount), R(reg_cycle_count)); MOV(32, PPCSTATE(downcount), R(reg_downcount)); // downcount -= (RSCRATCH2 * reg_cycle_count) SetJumpTarget(downcount_is_zero_or_negative);