Merge pull request #11956 from JosJuice/jitarm64-12-bit-asm

JitArm64: Micro-optimizations in fres routine
This commit is contained in:
Admiral H. Curtiss 2023-06-17 04:00:22 +02:00 committed by GitHub
commit 089d433996
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -287,19 +287,18 @@ void JitArm64::GenerateFres()
CMP(ARM64Reg::X2, 895); CMP(ARM64Reg::X2, 895);
FixupBranch small_exponent = B(CCFlags::CC_LO); FixupBranch small_exponent = B(CCFlags::CC_LO);
MOVI2R(ARM64Reg::X4, 1148LL); CMP(ARM64Reg::X2, 1148);
CMP(ARM64Reg::X2, ARM64Reg::X4);
FixupBranch large_exponent = B(CCFlags::CC_HI); FixupBranch large_exponent = B(CCFlags::CC_HI);
UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5); // Grab upper part of mantissa UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5); // Grab upper part of mantissa
MOVP2R(ARM64Reg::X3, &Common::fres_expected); MOVP2R(ARM64Reg::X3, &Common::fres_expected);
ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3)); ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa
LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
MOVI2R(ARM64Reg::W4, 1); MOVI2R(ARM64Reg::W4, 1);
AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, 64));
MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4); MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4);
SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1)); SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1));
AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, 64));
ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29)); ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
RET(); RET();
@ -320,8 +319,7 @@ void JitArm64::GenerateFres()
RET(); RET();
SetJumpTarget(large_exponent); SetJumpTarget(large_exponent);
MOVI2R(ARM64Reg::X4, 0x7FF); CMP(ARM64Reg::X2, 0x7FF);
CMP(ARM64Reg::X2, ARM64Reg::X4);
CSEL(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X3, CCFlags::CC_EQ); CSEL(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X3, CCFlags::CC_EQ);
RET(); RET();
} }