diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index dcfaf21779..c49a23031b 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -21,14 +21,16 @@ #include "VertexLoader_Normal.h" #include "VertexManagerBase.h" #include "CPUDetect.h" +#include -#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) +#if _M_SSE >= 0x401 +#include +#include +#elif _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) #include #endif -#define LOG_NORM8() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); -#define LOG_NORM16() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); -#define LOG_NORMF() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); +#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; @@ -118,98 +120,156 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned #define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14) // --- Direct --- - -inline void ReadDirectS8() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadS8() * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadS8() * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = DataReadS8() * S8FRAC; - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM8(); -} - -inline void ReadDirectS16() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)DataReadU16()) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)DataReadU16()) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)DataReadU16()) * S16FRAC; - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM16() -} - -inline void ReadDirectFloat() -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - ((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); - VertexManager::s_pCurBufferPointer += 12; - LOG_NORMF() -} - -inline void ReadIndirectS8(const s8* pData) +inline void ReadIndirectS8x3(const s8* pData) { ((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC; ((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC; ((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC; VertexManager::s_pCurBufferPointer += 12; - LOG_NORM8(); + LOG_NORM(); } -inline void ReadIndirectS16(const u16* pData) +inline void ReadIndirectS8x9(const s8* pData) +{ + ((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC; + LOG_NORM(); + ((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC; + LOG_NORM(); + ((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC; + LOG_NORM(); + VertexManager::s_pCurBufferPointer += 36; +} + +inline void ReadIndirectS16x3(const u16* pData) { ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC; ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC; ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC; VertexManager::s_pCurBufferPointer += 12; - LOG_NORM16() + LOG_NORM() } -inline void ReadIndirectFloat(const u32* pData) +inline void ReadIndirectS16x9(const u16* pData) +{ + ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC; + LOG_NORM() + ((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC; + LOG_NORM() + ((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC; + ((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC; + LOG_NORM() + VertexManager::s_pCurBufferPointer += 36; +} + +inline void ReadIndirectFloatx3(const u32* pData) { ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); VertexManager::s_pCurBufferPointer += 12; - LOG_NORMF(); + LOG_NORM(); } +inline void ReadIndirectFloatx9(const u32* pData) +{ + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); + LOG_NORM(); + ((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]); + ((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]); + ((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]); + LOG_NORM(); + ((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]); + ((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]); + ((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]); + LOG_NORM(); + VertexManager::s_pCurBufferPointer += 36; +} + +inline void ReadDirectS8x3() +{ + const s8* Source = (const s8*)DataGetPosition(); + ReadIndirectS8x3(Source); + DataSkip(3); +} + +inline void ReadDirectS8x9() +{ + const s8* Source = (const s8*)DataGetPosition(); + ReadIndirectS8x9(Source); + DataSkip(9); +} + +inline void ReadDirectS16x3() +{ + const u16* Source = (const u16*)DataGetPosition(); + ReadIndirectS16x3(Source); + DataSkip(6); +} + +inline void ReadDirectS16x9() +{ + const u16* Source = (const u16*)DataGetPosition(); + ReadIndirectS16x9(Source); + DataSkip(18); +} + +inline void ReadDirectFloatx3() +{ + const u32* Source = (const u32*)DataGetPosition(); + ReadIndirectFloatx3(Source); + DataSkip(12); +} + +inline void ReadDirectFloatx9() +{ + const u32* Source = (const u32*)DataGetPosition(); + ReadIndirectFloatx9(Source); + DataSkip(36); +} + + + void LOADERDECL VertexLoader_Normal::Normal_DirectByte() { - ReadDirectS8(); + ReadDirectS8x3(); } void LOADERDECL VertexLoader_Normal::Normal_DirectShort() { - ReadDirectS16(); + ReadDirectS16x3(); } void LOADERDECL VertexLoader_Normal::Normal_DirectFloat() { - ReadDirectFloat(); + ReadDirectFloatx3(); } void LOADERDECL VertexLoader_Normal::Normal_DirectByte3() { - for (int i = 0; i < 3; i++) - { - ReadDirectS8(); - } + ReadDirectS8x9(); } void LOADERDECL VertexLoader_Normal::Normal_DirectShort3() { - for (int i = 0; i < 3; i++) - { - ReadDirectS16(); - } + ReadDirectS16x9(); } void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3() { - for (int i = 0; i < 3; i++) - { - ReadDirectFloat(); - } + ReadDirectFloatx9(); } @@ -219,51 +279,42 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte() { u8 Index = DataReadU8(); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8(pData); + ReadIndirectS8x3(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index8_Short() { u8 Index = DataReadU8(); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16(pData); + ReadIndirectS16x3(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index8_Float() { u8 Index = DataReadU8(); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloat(pData); + ReadIndirectFloatx3(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1() { u8 Index = DataReadU8(); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - for (int i = 0; i < 3; i++) - { - ReadIndirectS8((const s8*)(&pData[3 * i])); - } + ReadIndirectS8x9(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1() { u8 Index = DataReadU8(); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - for (int i = 0; i < 3; i++) - { - ReadIndirectS16((const u16*)(&pData[3 * i])); - } + ReadIndirectS16x9(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1() { u8 Index = DataReadU8(); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - for (int i = 0; i < 3; i++) - { - ReadIndirectFloat((const u32*)(&pData[3 * i])); - } + ReadIndirectFloatx9(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3() @@ -272,7 +323,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3() { u8 Index = DataReadU8(); const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i); - ReadIndirectS8(pData); + ReadIndirectS8x3(pData); } } @@ -283,7 +334,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3() { u8 Index = DataReadU8(); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); - ReadIndirectS16(pData); + ReadIndirectS16x3(pData); } } @@ -293,7 +344,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3() { u8 Index = DataReadU8(); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); - ReadIndirectFloat(pData); + ReadIndirectFloatx3(pData); } } @@ -305,53 +356,42 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte() { u16 Index = DataReadU16(); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8(pData); + ReadIndirectS8x3(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index16_Short() { u16 Index = DataReadU16(); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16(pData); + ReadIndirectS16x3(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index16_Float() { u16 Index = DataReadU16(); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloat(pData); + ReadIndirectFloatx3(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1() { u16 Index = DataReadU16(); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - for (int i = 0; i < 3; i++) - { - ReadIndirectS8((const s8 *)(&pData[3 * i])); - } + ReadIndirectS8x9(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1() { u16 Index = DataReadU16(); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - - for (int i = 0; i < 3; i++) - { - ReadIndirectS16((const u16 *)(&pData[3 * i])); - } + ReadIndirectS16x9(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1() { u16 Index = DataReadU16(); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - - for (int i = 0; i < 3; i++) - { - ReadIndirectFloat((const u32 *)(&pData[3 * i])); - } + ReadIndirectFloatx9(pData); } void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3() @@ -360,7 +400,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3() { u16 Index = DataReadU16(); const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i); - ReadIndirectS8(pData); + ReadIndirectS8x3(pData); } } @@ -370,7 +410,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3() { u16 Index = DataReadU16(); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); - ReadIndirectS16(pData); + ReadIndirectS16x3(pData); } } @@ -380,6 +420,6 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3() { u16 Index = DataReadU16(); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); - ReadIndirectFloat(pData); + ReadIndirectFloatx3(pData); } } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index a497588312..48208b4aac 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -858,9 +858,18 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) // convert data and set the target texture as our new EFB g_renderer->ResetAPIState(); D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorReinterpretSurface()); + D3DVIEWPORT9 vp; + vp.X = 0; + vp.Y = 0; + vp.Width = g_renderer->GetFullTargetWidth(); + vp.Height = g_renderer->GetFullTargetHeight(); + vp.MinZ = 0.0; + vp.MaxZ = 1.0; + D3D::dev->SetViewport(&vp); + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); D3D::drawShadedTexQuad(FramebufferManager::GetEFBColorTexture(), &source, g_renderer->GetFullTargetWidth(), g_renderer->GetFullTargetHeight(), g_renderer->GetFullTargetWidth(), g_renderer->GetFullTargetHeight(), pixel_shader, VertexShaderCache::GetSimpleVertexShader(0)); FramebufferManager::SwapReinterpretTexture(); - D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface()); + D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); g_renderer->RestoreAPIState(); } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp index 4dbb5e806d..a26a8407b8 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp @@ -22,18 +22,22 @@ namespace VertexFormatConverter { + // This fracs are fixed acording to format +#define S8FRAC 0.015625f; // 1.0f / (1U << 6) +#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14) + void LoadNormal1_Byte(InputVertexData *dst, u8 *src) { - dst->normal[0].x = (float)(s8)src[0] / 128; - dst->normal[0].y = (float)(s8)src[1] / 128; - dst->normal[0].z = (float)(s8)src[2] / 128; + dst->normal[0].x = ((s8)src[0]) * S8FRAC; + dst->normal[0].y = ((s8)src[1]) * S8FRAC; + dst->normal[0].z = ((s8)src[2]) * S8FRAC; } void LoadNormal1_Short(InputVertexData *dst, u8 *src) { - dst->normal[0].x = (float)((s16*)src)[0] / 32768; - dst->normal[0].y = (float)((s16*)src)[1] / 32768; - dst->normal[0].z = (float)((s16*)src)[2] / 32768; + dst->normal[0].x = ((s16*)src)[0] * S16FRAC; + dst->normal[0].y = ((s16*)src)[1] * S16FRAC; + dst->normal[0].z = ((s16*)src)[2] * S16FRAC; } void LoadNormal1_Float(InputVertexData *dst, u8 *src) @@ -47,9 +51,9 @@ namespace VertexFormatConverter { for (int i = 0, j = 0; i < 3; i++, j+=3) { - dst->normal[i].x = (float)(s8)src[j + 0] / 128; - dst->normal[i].y = (float)(s8)src[j + 1] / 128; - dst->normal[i].z = (float)(s8)src[j + 2] / 128; + dst->normal[i].x = ((s8)src[j + 0]) * S8FRAC; + dst->normal[i].y = ((s8)src[j + 1]) * S8FRAC; + dst->normal[i].z = ((s8)src[j + 2]) * S8FRAC; } } @@ -57,9 +61,9 @@ namespace VertexFormatConverter { for (int i = 0, j = 0; i < 3; i++, j+=3) { - dst->normal[i].x = (float)((s16*)src)[j + 0] / 32768; - dst->normal[i].y = (float)((s16*)src)[j + 1] / 32768; - dst->normal[i].z = (float)((s16*)src)[j + 2] / 32768; + dst->normal[i].x = ((s16*)src)[j + 0] * S16FRAC; + dst->normal[i].y = ((s16*)src)[j + 1] * S16FRAC; + dst->normal[i].z = ((s16*)src)[j + 2] * S16FRAC; } }