JIT: use cvtsi2ss in paired singles

One less instruction for a few of the loads.
This commit is contained in:
Fiora 2014-09-16 22:50:33 -07:00
parent 2c233c4976
commit 2ae6f13d22

View File

@ -544,8 +544,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG); SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
else else
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better?
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
@ -585,8 +584,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG); SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG);
else else
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true); UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
@ -621,8 +619,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG); SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
else else
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false); UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
@ -656,8 +653,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG); SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG);
else else
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true); UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));