diff --git a/Source/Core/Common/Src/CommonFuncs.h b/Source/Core/Common/Src/CommonFuncs.h index 5aa174ed98..294f64632d 100644 --- a/Source/Core/Common/Src/CommonFuncs.h +++ b/Source/Core/Common/Src/CommonFuncs.h @@ -121,9 +121,12 @@ inline u16 swap16(u16 _data) {return bswap_16(_data);} inline u32 swap32(u32 _data) {return bswap_32(_data);} inline u64 swap64(u64 _data) {return bswap_64(_data);} #elif __APPLE__ -inline u16 swap16(u16 _data) {return (_data >> 8) | (_data << 8);} -inline u32 swap32(u32 _data) {return __builtin_bswap32(_data);} -inline u64 swap64(u64 _data) {return __builtin_bswap64(_data);} +inline __attribute__((always_inline)) u16 swap16(u16 _data) + {return (_data >> 8) | (_data << 8);} +inline __attribute__((always_inline)) u32 swap32(u32 _data) + {return __builtin_bswap32(_data);} +inline __attribute__((always_inline)) u64 swap64(u64 _data) + {return __builtin_bswap64(_data);} #else // Slow generic implementation. inline u16 swap16(u16 data) {return (data >> 8) | (data << 8);} diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp index 1b8fc73f5d..d27b32f312 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp @@ -33,14 +33,14 @@ extern int colIndex; extern int colElements[2]; -inline void _SetCol(u32 val) +__forceinline void _SetCol(u32 val) { *(u32*)VertexManager::s_pCurBufferPointer = val; VertexManager::s_pCurBufferPointer += 4; colIndex++; } -void _SetCol4444(u16 val) +__forceinline void _SetCol4444(u16 val) { u32 col = Convert4To8(val & 0xF) << ASHIFT; col |= Convert4To8((val >> 12) & 0xF) << RSHIFT; @@ -49,7 +49,7 @@ void _SetCol4444(u16 val) _SetCol(col); } -void _SetCol6666(u32 val) +__forceinline void _SetCol6666(u32 val) { u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT; col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT; @@ -58,7 +58,7 @@ void _SetCol6666(u32 val) _SetCol(col); } -void _SetCol565(u16 val) +__forceinline void _SetCol565(u16 val) { u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT; col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT; @@ -67,12 +67,12 @@ void _SetCol565(u16 val) } -inline u32 _Read24(const u8 *addr) +__forceinline u32 _Read24(const u8 *addr) { return addr[0] | (addr[1] << 8) | (addr[2] << 16) | 0xFF000000; } -inline u32 _Read32(const u8 *addr) +__forceinline u32 _Read32(const u8 *addr) { return *(const u32 *)addr; } diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index 42c689ab9a..594bdcb5a0 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -92,17 +92,6 @@ void Pos_ReadDirect() VertexManager::s_pCurBufferPointer += 12; } -// Explicitly instantiate these functions to decrease the possibility of -// symbol binding problems when (only) calling them from JIT compiled code. -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); - void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect(); } void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect(); } void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect(); } @@ -162,7 +151,7 @@ inline void Pos_ReadIndex_Short(int Index) } template -inline void Pos_ReadIndex_Float(int Index) +void Pos_ReadIndex_Float(int Index) { const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); @@ -180,7 +169,7 @@ static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x0 static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); template -inline void Pos_ReadIndex_Float_SSSE3(int Index) +void Pos_ReadIndex_Float_SSSE3(int Index) { const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); const __m128i a = _mm_loadu_si128((__m128i*)pData); @@ -191,6 +180,27 @@ inline void Pos_ReadIndex_Float_SSSE3(int Index) } #endif +// Explicitly instantiate these functions to decrease the possibility of +// symbol binding problems when (only) calling them from JIT compiled code. +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadDirect(); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Float(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Float(int Index); + // ============================================================================== // Index 8 // ==============================================================================