mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-21 05:09:34 -06:00
TextureDecoder.cpp: new SSE2 optimized GX_TF_I8 decoder. Probably not ultimately optimal SSE2 code, but provably better (on my machine) than the memset version. Tested with __rdtsc counts in an independent project. I get about 6-7 FPS more on average during the intro movie playback in Mario Kart Wii. Hope this compiles for GCC okay.
TextureDecoder.cpp: merged two functionally identical decode5A3RGBA and decode5A3rgba methods. OpcodeDecoding.cpp and DLCache.cpp: optimization for GX_LOAD_XF_REG. The PSUHFB solution sounds better for SSSE3, but this is a small win for the default case. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6692 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
@ -64,6 +64,32 @@ __forceinline u32 DataReadU32()
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template<unsigned int N>
|
||||
void DataReadU32xN(u32 *bufx16)
|
||||
{
|
||||
memcpy(bufx16, g_pVideoData, sizeof(u32) * N);
|
||||
if (N >= 1) bufx16[0] = Common::swap32(bufx16[0]);
|
||||
if (N >= 2) bufx16[1] = Common::swap32(bufx16[1]);
|
||||
if (N >= 3) bufx16[2] = Common::swap32(bufx16[2]);
|
||||
if (N >= 4) bufx16[3] = Common::swap32(bufx16[3]);
|
||||
if (N >= 5) bufx16[4] = Common::swap32(bufx16[4]);
|
||||
if (N >= 6) bufx16[5] = Common::swap32(bufx16[5]);
|
||||
if (N >= 7) bufx16[6] = Common::swap32(bufx16[6]);
|
||||
if (N >= 8) bufx16[7] = Common::swap32(bufx16[7]);
|
||||
if (N >= 9) bufx16[8] = Common::swap32(bufx16[8]);
|
||||
if (N >= 10) bufx16[9] = Common::swap32(bufx16[9]);
|
||||
if (N >= 11) bufx16[10] = Common::swap32(bufx16[10]);
|
||||
if (N >= 12) bufx16[11] = Common::swap32(bufx16[11]);
|
||||
if (N >= 13) bufx16[12] = Common::swap32(bufx16[12]);
|
||||
if (N >= 14) bufx16[13] = Common::swap32(bufx16[13]);
|
||||
if (N >= 15) bufx16[14] = Common::swap32(bufx16[14]);
|
||||
if (N >= 16) bufx16[15] = Common::swap32(bufx16[15]);
|
||||
g_pVideoData += (sizeof(u32) * N);
|
||||
}
|
||||
|
||||
typedef void (*DataReadU32xNfunc)(u32 *buf);
|
||||
extern DataReadU32xNfunc DataReadU32xFuncs[16];
|
||||
|
||||
__forceinline u32 DataReadU32Unswapped()
|
||||
{
|
||||
u32 tmp = *(u32*)g_pVideoData;
|
||||
|
Reference in New Issue
Block a user