VideoCommon: merged SSSE3/SSE4.1 codes. Added some additional SSSE3/SSE4.1 codes which will be used in "The Legend of Zelda: Twilight Princess".

These codes don't work unless "_M_SSE=0x301", for SSSE3, or "_M_SSE=0x401", for SSE4.1, is defined as a preprocessor definition.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5300 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nodchip
2010-04-09 03:02:12 +00:00
parent 73caf37bca
commit 6136c94de5
4 changed files with 171 additions and 5 deletions

View File

@ -21,6 +21,10 @@
#include "VertexLoader_Position.h"
#include "NativeVertexWriter.h"
#if _M_SSE >= 301
#include <tmmintrin.h>
#endif
extern float posScale;
extern TVtxAttr *pVtxAttr;
@ -146,16 +150,33 @@ inline void Pos_ReadIndex_Short(int Index)
VertexManager::s_pCurBufferPointer += 12;
}
#if _M_SSE >= 0x301
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
#endif
template<bool three>
inline void Pos_ReadIndex_Float(int Index)
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
#if _M_SSE >= 0x301
const __m128i a = _mm_loadu_si128((__m128i*)pData);
__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
#else
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
if (three)
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
#endif
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}