Merge pull request #9714 from JosJuice/jitarm64-convert-fmov

JitArm64: Prefer using FMOV when doing single/double conversion
This commit is contained in:
Mai M 2021-05-20 10:24:36 -04:00 committed by GitHub
commit 5949a19fe6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 17 deletions

View File

@ -509,9 +509,9 @@ void JitArm64::ConvertDoubleToSingleLower(size_t guest_reg, ARM64Reg dest_reg, A
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30}; const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
ABI_PushRegisters(gpr_saved); ABI_PushRegisters(gpr_saved);
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0); m_float_emit.FMOV(ARM64Reg::X0, EncodeRegToDouble(src_reg));
BL(cdts); BL(cdts);
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1); m_float_emit.FMOV(EncodeRegToSingle(dest_reg), ARM64Reg::W1);
ABI_PopRegisters(gpr_saved); ABI_PopRegisters(gpr_saved);
} }
@ -529,11 +529,10 @@ void JitArm64::ConvertDoubleToSinglePair(size_t guest_reg, ARM64Reg dest_reg, AR
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30}; const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
ABI_PushRegisters(gpr_saved); ABI_PushRegisters(gpr_saved);
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0); m_float_emit.FMOV(ARM64Reg::X0, EncodeRegToDouble(src_reg));
BL(cdts); BL(cdts);
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1);
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 1); m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 1);
m_float_emit.FMOV(EncodeRegToSingle(dest_reg), ARM64Reg::W1);
BL(cdts); BL(cdts);
m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W1); m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W1);
@ -579,9 +578,9 @@ void JitArm64::ConvertSingleToDoubleLower(size_t guest_reg, ARM64Reg dest_reg, A
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 4, 30}; const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 4, 30};
ABI_PushRegisters(gpr_saved); ABI_PushRegisters(gpr_saved);
m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 0); m_float_emit.FMOV(ARM64Reg::W0, EncodeRegToSingle(src_reg));
BL(cstd); BL(cstd);
m_float_emit.INS(64, dest_reg, 0, ARM64Reg::X0); m_float_emit.FMOV(EncodeRegToDouble(dest_reg), ARM64Reg::X1);
ABI_PopRegisters(gpr_saved); ABI_PopRegisters(gpr_saved);
@ -650,17 +649,15 @@ void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, AR
// If no (or if we don't have a scratch register), call the bit-exact routine // If no (or if we don't have a scratch register), call the bit-exact routine
// Save X0-X4 and X30 if they're in use
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 4, 30}; const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 4, 30};
ABI_PushRegisters(gpr_saved); ABI_PushRegisters(gpr_saved);
m_float_emit.FMOV(ARM64Reg::W0, EncodeRegToSingle(src_reg));
BL(cstd);
m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 1); m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 1);
m_float_emit.FMOV(EncodeRegToDouble(dest_reg), ARM64Reg::X1);
BL(cstd); BL(cstd);
m_float_emit.INS(64, dest_reg, 1, ARM64Reg::X0); m_float_emit.INS(64, dest_reg, 1, ARM64Reg::X1);
m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 0);
BL(cstd);
m_float_emit.INS(64, dest_reg, 0, ARM64Reg::X0);
ABI_PopRegisters(gpr_saved); ABI_PopRegisters(gpr_saved);

View File

@ -369,7 +369,7 @@ void JitArm64::GenerateConvertDoubleToSingle()
RET(); RET();
} }
// Input in W0, output in X0, clobbers X0-X4 and flags. // Input in W0, output in X1, clobbers X0-X4 and flags.
void JitArm64::GenerateConvertSingleToDouble() void JitArm64::GenerateConvertSingleToDouble()
{ {
UBFX(ARM64Reg::W1, ARM64Reg::W0, 23, 8); UBFX(ARM64Reg::W1, ARM64Reg::W0, 23, 8);
@ -379,7 +379,7 @@ void JitArm64::GenerateConvertSingleToDouble()
FixupBranch denormal = CBNZ(ARM64Reg::W1); FixupBranch denormal = CBNZ(ARM64Reg::W1);
// Zero // Zero
LSL(ARM64Reg::X0, ARM64Reg::X0, 32); LSL(ARM64Reg::X1, ARM64Reg::X0, 32);
RET(); RET();
SetJumpTarget(denormal); SetJumpTarget(denormal);
@ -392,7 +392,7 @@ void JitArm64::GenerateConvertSingleToDouble()
LSLV(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::X3); LSLV(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::X3);
BFI(ARM64Reg::X2, ARM64Reg::X1, 30, 22); BFI(ARM64Reg::X2, ARM64Reg::X1, 30, 22);
MOVI2R(ARM64Reg::X1, 0x3a90000000000000); MOVI2R(ARM64Reg::X1, 0x3a90000000000000);
ADD(ARM64Reg::X0, ARM64Reg::X2, ARM64Reg::X1); ADD(ARM64Reg::X1, ARM64Reg::X2, ARM64Reg::X1);
RET(); RET();
SetJumpTarget(normal_or_nan); SetJumpTarget(normal_or_nan);
@ -407,7 +407,7 @@ void JitArm64::GenerateConvertSingleToDouble()
CMP(ARM64Reg::W2, 0); CMP(ARM64Reg::W2, 0);
CSEL(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::ZR, CCFlags::CC_NEQ); CSEL(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::ZR, CCFlags::CC_NEQ);
BFI(ARM64Reg::X3, ARM64Reg::X4, 29, 30); BFI(ARM64Reg::X3, ARM64Reg::X4, 29, 30);
ORR(ARM64Reg::X0, ARM64Reg::X3, ARM64Reg::X1); ORR(ARM64Reg::X1, ARM64Reg::X3, ARM64Reg::X1);
RET(); RET();
} }