mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-21 05:09:34 -06:00
VideoCommon: remove XFReg copy optimization
This code is just ugly and I doubt there is a way that copying twice is faster.
This commit is contained in:
@ -92,46 +92,6 @@ __forceinline u32 DataReadU32()
|
||||
return DataRead<u32>();
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
const __m128i bs_mask = _mm_set_epi32(0x0C0D0E0FL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
||||
|
||||
template<unsigned int N>
|
||||
void DataReadU32xN_SSSE3(u32 *bufx16)
|
||||
{
|
||||
memcpy(bufx16, g_pVideoData, sizeof(u32) * N);
|
||||
__m128i* buf = (__m128i *)bufx16;
|
||||
if (N>12) { _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); buf++; }
|
||||
if (N>8) { _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); buf++; }
|
||||
if (N>4) { _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); buf++; }
|
||||
_mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask));
|
||||
g_pVideoData += (sizeof(u32) * N);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<unsigned int N>
|
||||
void DataReadU32xN(u32 *bufx16)
|
||||
{
|
||||
memcpy(bufx16, g_pVideoData, sizeof(u32) * N);
|
||||
if (N >= 1) bufx16[0] = Common::swap32(bufx16[0]);
|
||||
if (N >= 2) bufx16[1] = Common::swap32(bufx16[1]);
|
||||
if (N >= 3) bufx16[2] = Common::swap32(bufx16[2]);
|
||||
if (N >= 4) bufx16[3] = Common::swap32(bufx16[3]);
|
||||
if (N >= 5) bufx16[4] = Common::swap32(bufx16[4]);
|
||||
if (N >= 6) bufx16[5] = Common::swap32(bufx16[5]);
|
||||
if (N >= 7) bufx16[6] = Common::swap32(bufx16[6]);
|
||||
if (N >= 8) bufx16[7] = Common::swap32(bufx16[7]);
|
||||
if (N >= 9) bufx16[8] = Common::swap32(bufx16[8]);
|
||||
if (N >= 10) bufx16[9] = Common::swap32(bufx16[9]);
|
||||
if (N >= 11) bufx16[10] = Common::swap32(bufx16[10]);
|
||||
if (N >= 12) bufx16[11] = Common::swap32(bufx16[11]);
|
||||
if (N >= 13) bufx16[12] = Common::swap32(bufx16[12]);
|
||||
if (N >= 14) bufx16[13] = Common::swap32(bufx16[13]);
|
||||
if (N >= 15) bufx16[14] = Common::swap32(bufx16[14]);
|
||||
if (N >= 16) bufx16[15] = Common::swap32(bufx16[15]);
|
||||
g_pVideoData += (sizeof(u32) * N);
|
||||
}
|
||||
|
||||
__forceinline u32 DataReadU32Unswapped()
|
||||
{
|
||||
u32 tmp = *(u32*)g_pVideoData;
|
||||
|
Reference in New Issue
Block a user