JIT: use cvtsi2ss in paired singles

One less instruction for a few of the loads.
This commit is contained in:
Fiora 2014-09-16 22:50:33 -07:00
parent 2c233c4976
commit 2ae6f13d22

View File

@ -544,8 +544,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
else
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better?
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1));
@ -585,8 +584,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG);
else
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0));
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1));
@ -621,8 +619,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
else
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0));
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1));
@ -656,8 +653,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG);
else
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0));
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1));