JitArm64: Optimize ps_mergeXX

1. In some cases, ps_merge01 can be implemented using one instruction.
2. When we need two instructions for ps_merge01, it's best to start with
   a MOV to avoid false dependencies on the destination register.
3. ps_merge10 can be implemented using a single EXT instruction.
This commit is contained in:
JosJuice
2022-11-26 17:57:16 +01:00
parent 0ef6d30a0d
commit f45d3a6a2c
3 changed files with 35 additions and 18 deletions

View File

@ -1247,6 +1247,9 @@ public:
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
// Extract
void EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 index);
// Scalar shift by immediate
void SHL(ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void URSHR(ARM64Reg Rd, ARM64Reg Rn, u32 shift);
@ -1305,6 +1308,7 @@ private:
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitExtract(u32 imm4, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
void EmitShiftImm(bool Q, bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitScalarShiftImm(bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);