mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-21 05:09:34 -06:00
more work on dlist caching now should be a little faster
a little bugfix in vertex loading and some fixes. not much time to work these days but at least i can spend a little time fixing thing. please test for regressions. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6409 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
@ -122,61 +122,73 @@ void LOADERDECL Pos_ReadDirect_Float2()
|
||||
}
|
||||
|
||||
|
||||
template<class T, bool three>
|
||||
template<class T, bool three,int MaxSize>
|
||||
inline void Pos_ReadIndex_Byte(int Index)
|
||||
{
|
||||
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
if(Index < MaxSize)
|
||||
{
|
||||
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, bool three>
|
||||
template<class T, bool three,int MaxSize>
|
||||
inline void Pos_ReadIndex_Short(int Index)
|
||||
{
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
if(Index < MaxSize)
|
||||
{
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool three>
|
||||
template<bool three,int MaxSize>
|
||||
void Pos_ReadIndex_Float(int Index)
|
||||
{
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
if (three)
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
if(Index < MaxSize)
|
||||
{
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
if (three)
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
||||
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
||||
|
||||
template<bool three>
|
||||
template<bool three,int MaxSize>
|
||||
void Pos_ReadIndex_Float_SSSE3(int Index)
|
||||
{
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||
const __m128i a = _mm_loadu_si128((__m128i*)pData);
|
||||
__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
|
||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
if(Index < MaxSize)
|
||||
{
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||
const __m128i a = _mm_loadu_si128((__m128i*)pData);
|
||||
__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
|
||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -190,50 +202,60 @@ template void Pos_ReadDirect<u8, false>();
|
||||
template void Pos_ReadDirect<s8, false>();
|
||||
template void Pos_ReadDirect<u16, false>();
|
||||
template void Pos_ReadDirect<s16, false>();
|
||||
template void Pos_ReadIndex_Byte<u8, true>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, true>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, true>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, true>(int Index);
|
||||
template void Pos_ReadIndex_Float<true>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, false>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, false>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, false>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, false>(int Index);
|
||||
template void Pos_ReadIndex_Float<false>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Float<true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Float<false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Float<true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Float<false, 65535>(int Index);
|
||||
|
||||
// ==============================================================================
|
||||
// Index 8
|
||||
// ==============================================================================
|
||||
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false, 255> (DataReadU8());}
|
||||
|
||||
// ==============================================================================
|
||||
// Index 16
|
||||
// ==============================================================================
|
||||
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false, 65535> (DataReadU16());}
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 65535> (DataReadU16());}
|
||||
#endif
|
||||
|
||||
static TPipelineFunction tableReadPosition[4][8][2] = {
|
||||
|
Reference in New Issue
Block a user