mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-30 09:39:46 -06:00
JitArm64: Reimplement Force25BitPrecision
The previous implementation of Force25BitPrecision was essentially a translation of the x86-64 implementation. It worked, but we can make a more efficient implementation by using an AArch64 instruction I don't believe x86-64 has an equivalent of: URSHR. The latency is the same as before, but the instruction count and register count are both reduced.
This commit is contained in:
@ -1229,9 +1229,15 @@ public:
|
||||
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Shift by immediate
|
||||
// Scalar shift by immediate
|
||||
void SHL(ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void URSHR(ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
|
||||
// Vector shift by immediate
|
||||
void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void URSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
|
Reference in New Issue
Block a user