VertexLoader: remove non-JIT SSE code

This commit is contained in:
Tillmann Karras
2015-01-19 18:33:08 +01:00
parent 38cb9bbfae
commit 873902b4a3
6 changed files with 8 additions and 349 deletions

View File

@ -6,8 +6,6 @@
#include <type_traits>
#include "Common/CommonTypes.h"
#include "Common/CPUDetect.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexManagerBase.h"
@ -102,63 +100,6 @@ struct Normal_Index_Indices3
static const int size = sizeof(I) * 3;
};
#if _M_SSE >= 0x301
template <typename T, int N>
struct Normal_Direct_SSSE3
{
static void LOADERDECL function(VertexLoader* loader)
{
const T* pData = reinterpret_cast<const T*>(DataGetPosition());
const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
const __m128 scale = _mm_set_ps1(frac);
for (int i = 0; i < N; i++, pData += 3)
Vertex_Read_SSSE3<T, true, true>(pData, scale);
DataSkip<N * 3 * sizeof(T)>();
}
static const int size = sizeof(T) * N * 3;
};
template <typename I, typename T, int N, int Offset>
__forceinline void Normal_Index_Offset_SSSE3()
{
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
auto const index = DataRead<I>();
const T* pData = (const T*)(cached_arraybases[ARRAY_NORMAL]
+ (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
const __m128 scale = _mm_set_ps1(frac);
for (int i = 0; i < N; i++, pData += 3)
Vertex_Read_SSSE3<T, true, true>(pData, scale);
}
template <typename I, typename T, int N>
struct Normal_Index_SSSE3
{
static void LOADERDECL function(VertexLoader* loader)
{
Normal_Index_Offset_SSSE3<I, T, N, 0>();
}
static const int size = sizeof(I);
};
template <typename I, typename T>
struct Normal_Index_Indices3_SSSE3
{
static void LOADERDECL function(VertexLoader* loader)
{
Normal_Index_Offset_SSSE3<I, T, 1, 0>();
Normal_Index_Offset_SSSE3<I, T, 1, 1>();
Normal_Index_Offset_SSSE3<I, T, 1, 2>();
}
static const int size = sizeof(I) * 3;
};
#endif
}
void VertexLoader_Normal::Init()
@ -231,77 +172,6 @@ void VertexLoader_Normal::Init()
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3)
{
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 1>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 1>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 3>();
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u8, u8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u8, s8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u8, u16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u8, s16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u8, float, 1>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_SSSE3<u8, u8, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index_SSSE3<u8, s8, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index_SSSE3<u8, u16, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index_SSSE3<u8, s16, 3>();
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_SSSE3<u8, float, 3>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u8, u8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u8, s8, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u8, u16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u8, s16, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u8, float, 1>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3_SSSE3<u8, u8>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3_SSSE3<u8, s8>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3_SSSE3<u8, u16>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3_SSSE3<u8, s16>();
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3_SSSE3<u8, float>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_SSSE3<u16, u8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index_SSSE3<u16, s8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index_SSSE3<u16, u16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index_SSSE3<u16, s16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_SSSE3<u16, float, 3>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3_SSSE3<u16, u8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3_SSSE3<u16, s8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3_SSSE3<u16, u16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3_SSSE3<u16, s16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3_SSSE3<u16, float>();
}
#endif
}
unsigned int VertexLoader_Normal::GetSize(u64 _type,