Jit_LoadStore: Another minor dcbx optimization

The multiplication needs the value from RSCRATCH2, but shouldn't
overwrite it as it is still needed later. The original code solved this
by copying RSCRATCH2 to another register first.

As it turns out, the other register involved in the multiplication can
safely be overwritten, so we can swap the operands around and use
RSCRATCH2 directly without making a copy.

Before:
33 D2                xor         edx,edx
8B 45 64             mov         eax,dword ptr [rbp+64h]
85 C0                test        eax,eax
7E 30                jle         000002D4DF373F6B
44 8B B5 D4 02 00 00 mov         r14d,dword ptr [rbp+2D4h]
44 8B E8             mov         r13d,eax
BF 07 00 00 00       mov         edi,7
F7 F7                div         eax,edi
41 8D 56 FF          lea         edx,[r14-1]
3B C2                cmp         eax,edx
0F 42 D0             cmovb       edx,eax
44 2B F2             sub         r14d,edx
44 89 B5 D4 02 00 00 mov         dword ptr [rbp+2D4h],r14d
8B C2                mov         eax,edx
0F AF C7             imul        eax,edi
44 2B E8             sub         r13d,eax
44 89 6D 64          mov         dword ptr [rbp+64h],r13d
44 8D 72 01          lea         r14d,[rdx+1]

After:
33 D2                xor         edx,edx
8B 45 64             mov         eax,dword ptr [rbp+64h]
85 C0                test        eax,eax
7E 2E                jle         0000021C01013F69
44 8B B5 D4 02 00 00 mov         r14d,dword ptr [rbp+2D4h]
44 8B E8             mov         r13d,eax
BF 07 00 00 00       mov         edi,7
F7 F7                div         eax,edi
41 8D 56 FF          lea         edx,[r14-1]
3B C2                cmp         eax,edx
0F 42 D0             cmovb       edx,eax
44 2B F2             sub         r14d,edx
44 89 B5 D4 02 00 00 mov         dword ptr [rbp+2D4h],r14d
0F AF FA             imul        edi,edx
44 2B EF             sub         r13d,edi
44 89 6D 64          mov         dword ptr [rbp+64h],r13d
44 8D 72 01          lea         r14d,[rdx+1]
This commit is contained in:
Sintendo 2023-10-24 00:42:30 +02:00
parent dd58a8d65e
commit 171f76ae07

View File

@ -291,10 +291,9 @@ void Jit64::dcbx(UGeckoInstruction inst)
// registers.
SUB(32, R(loop_counter), R(RSCRATCH2));
MOV(32, PPCSTATE_CTR, R(loop_counter)); // CTR -= RSCRATCH2
MOV(32, R(RSCRATCH), R(RSCRATCH2));
IMUL(32, RSCRATCH, R(reg_cycle_count));
IMUL(32, reg_cycle_count, R(RSCRATCH2));
// ^ Note that this cannot overflow because it's limited by (downcount/cycle_count).
SUB(32, R(reg_downcount), R(RSCRATCH));
SUB(32, R(reg_downcount), R(reg_cycle_count));
MOV(32, PPCSTATE(downcount), R(reg_downcount)); // downcount -= (RSCRATCH2 * reg_cycle_count)
SetJumpTarget(downcount_is_zero_or_negative);