diff --git a/Source/Core/VideoCommon/Src/DataReader.h b/Source/Core/VideoCommon/Src/DataReader.h index fb9a6bbe60..7feb066645 100644 --- a/Source/Core/VideoCommon/Src/DataReader.h +++ b/Source/Core/VideoCommon/Src/DataReader.h @@ -90,17 +90,23 @@ void DataReadU32xN_SSSE3(u32 *bufx16) case 13: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 9: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 5: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); - case 1: _mm_storeu_si128(store, _mm_shuffle_epi8(_mm_loadu_si128(load), mask1)); + case 1: // 1 U32 left: + ((u32 *)store)[0] = Common::swap32(((u32 *)load)[0]); break; case 14: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 10: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 6: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); - case 2: _mm_storeu_si128(store, _mm_shuffle_epi8(_mm_loadu_si128(load), mask2)); + case 2: // 2 U32s left: + ((u32 *)store)[0] = Common::swap32(((u32 *)load)[0]); + ((u32 *)store)[1] = Common::swap32(((u32 *)load)[1]); break; case 15: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 11: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 7: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); - case 3: _mm_storeu_si128(store, _mm_shuffle_epi8(_mm_loadu_si128(load), mask3)); + case 3: // 3 U32s left: + ((u32 *)store)[0] = Common::swap32(((u32 *)load)[0]); + ((u32 *)store)[1] = Common::swap32(((u32 *)load)[1]); + ((u32 *)store)[2] = Common::swap32(((u32 *)load)[2]); break; case 16: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4)); case 12: _mm_storeu_si128(store++, _mm_shuffle_epi8(_mm_loadu_si128(load++), mask4));