2 fixes one for dx9 and one for software plugin.

some little optimization to normal loading.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6898 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2011-01-23 15:29:57 +00:00
parent 9749d617d0
commit 7cd7c2838b
3 changed files with 157 additions and 104 deletions

View File

@ -21,14 +21,16 @@
#include "VertexLoader_Normal.h"
#include "VertexManagerBase.h"
#include "CPUDetect.h"
#include <cmath>
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
#if _M_SSE >= 0x401
#include <smmintrin.h>
#include <emmintrin.h>
#elif _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
#include <tmmintrin.h>
#endif
#define LOG_NORM8() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_NORM16() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_NORMF() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
@ -118,98 +120,156 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned
#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14)
// --- Direct ---
inline void ReadDirectS8()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadS8() * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadS8() * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = DataReadS8() * S8FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM8();
}
inline void ReadDirectS16()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)DataReadU16()) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)DataReadU16()) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)DataReadU16()) * S16FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM16()
}
inline void ReadDirectFloat()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF()
}
inline void ReadIndirectS8(const s8* pData)
inline void ReadIndirectS8x3(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM8();
LOG_NORM();
}
inline void ReadIndirectS16(const u16* pData)
inline void ReadIndirectS8x9(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
LOG_NORM();
((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC;
LOG_NORM();
((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC;
LOG_NORM();
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadIndirectS16x3(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM16()
LOG_NORM()
}
inline void ReadIndirectFloat(const u32* pData)
inline void ReadIndirectS16x9(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
LOG_NORM()
((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC;
LOG_NORM()
((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC;
LOG_NORM()
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadIndirectFloatx3(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
LOG_NORM();
}
inline void ReadIndirectFloatx9(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
LOG_NORM();
((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]);
((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]);
((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]);
LOG_NORM();
((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]);
((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]);
((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]);
LOG_NORM();
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadDirectS8x3()
{
const s8* Source = (const s8*)DataGetPosition();
ReadIndirectS8x3(Source);
DataSkip(3);
}
inline void ReadDirectS8x9()
{
const s8* Source = (const s8*)DataGetPosition();
ReadIndirectS8x9(Source);
DataSkip(9);
}
inline void ReadDirectS16x3()
{
const u16* Source = (const u16*)DataGetPosition();
ReadIndirectS16x3(Source);
DataSkip(6);
}
inline void ReadDirectS16x9()
{
const u16* Source = (const u16*)DataGetPosition();
ReadIndirectS16x9(Source);
DataSkip(18);
}
inline void ReadDirectFloatx3()
{
const u32* Source = (const u32*)DataGetPosition();
ReadIndirectFloatx3(Source);
DataSkip(12);
}
inline void ReadDirectFloatx9()
{
const u32* Source = (const u32*)DataGetPosition();
ReadIndirectFloatx9(Source);
DataSkip(36);
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
{
ReadDirectS8();
ReadDirectS8x3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
{
ReadDirectS16();
ReadDirectS16x3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
{
ReadDirectFloat();
ReadDirectFloatx3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
{
for (int i = 0; i < 3; i++)
{
ReadDirectS8();
}
ReadDirectS8x9();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
{
for (int i = 0; i < 3; i++)
{
ReadDirectS16();
}
ReadDirectS16x9();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
{
for (int i = 0; i < 3; i++)
{
ReadDirectFloat();
}
ReadDirectFloatx9();
}
@ -219,51 +279,42 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8(pData);
ReadIndirectS8x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16(pData);
ReadIndirectS16x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloat(pData);
ReadIndirectFloatx3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
ReadIndirectS8((const s8*)(&pData[3 * i]));
}
ReadIndirectS8x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
ReadIndirectS16((const u16*)(&pData[3 * i]));
}
ReadIndirectS16x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
ReadIndirectFloat((const u32*)(&pData[3 * i]));
}
ReadIndirectFloatx9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
@ -272,7 +323,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
{
u8 Index = DataReadU8();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
ReadIndirectS8(pData);
ReadIndirectS8x3(pData);
}
}
@ -283,7 +334,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
ReadIndirectS16(pData);
ReadIndirectS16x3(pData);
}
}
@ -293,7 +344,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
ReadIndirectFloat(pData);
ReadIndirectFloatx3(pData);
}
}
@ -305,53 +356,42 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8(pData);
ReadIndirectS8x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16(pData);
ReadIndirectS16x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloat(pData);
ReadIndirectFloatx3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
ReadIndirectS8((const s8 *)(&pData[3 * i]));
}
ReadIndirectS8x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
ReadIndirectS16((const u16 *)(&pData[3 * i]));
}
ReadIndirectS16x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
ReadIndirectFloat((const u32 *)(&pData[3 * i]));
}
ReadIndirectFloatx9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
@ -360,7 +400,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
{
u16 Index = DataReadU16();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
ReadIndirectS8(pData);
ReadIndirectS8x3(pData);
}
}
@ -370,7 +410,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
ReadIndirectS16(pData);
ReadIndirectS16x3(pData);
}
}
@ -380,6 +420,6 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
ReadIndirectFloat(pData);
ReadIndirectFloatx3(pData);
}
}

View File

@ -858,9 +858,18 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
// convert data and set the target texture as our new EFB
g_renderer->ResetAPIState();
D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorReinterpretSurface());
D3DVIEWPORT9 vp;
vp.X = 0;
vp.Y = 0;
vp.Width = g_renderer->GetFullTargetWidth();
vp.Height = g_renderer->GetFullTargetHeight();
vp.MinZ = 0.0;
vp.MaxZ = 1.0;
D3D::dev->SetViewport(&vp);
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
D3D::drawShadedTexQuad(FramebufferManager::GetEFBColorTexture(), &source, g_renderer->GetFullTargetWidth(), g_renderer->GetFullTargetHeight(), g_renderer->GetFullTargetWidth(), g_renderer->GetFullTargetHeight(), pixel_shader, VertexShaderCache::GetSimpleVertexShader(0));
FramebufferManager::SwapReinterpretTexture();
D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface());
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
g_renderer->RestoreAPIState();
}

View File

@ -22,18 +22,22 @@
namespace VertexFormatConverter
{
// This fracs are fixed acording to format
#define S8FRAC 0.015625f; // 1.0f / (1U << 6)
#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14)
void LoadNormal1_Byte(InputVertexData *dst, u8 *src)
{
dst->normal[0].x = (float)(s8)src[0] / 128;
dst->normal[0].y = (float)(s8)src[1] / 128;
dst->normal[0].z = (float)(s8)src[2] / 128;
dst->normal[0].x = ((s8)src[0]) * S8FRAC;
dst->normal[0].y = ((s8)src[1]) * S8FRAC;
dst->normal[0].z = ((s8)src[2]) * S8FRAC;
}
void LoadNormal1_Short(InputVertexData *dst, u8 *src)
{
dst->normal[0].x = (float)((s16*)src)[0] / 32768;
dst->normal[0].y = (float)((s16*)src)[1] / 32768;
dst->normal[0].z = (float)((s16*)src)[2] / 32768;
dst->normal[0].x = ((s16*)src)[0] * S16FRAC;
dst->normal[0].y = ((s16*)src)[1] * S16FRAC;
dst->normal[0].z = ((s16*)src)[2] * S16FRAC;
}
void LoadNormal1_Float(InputVertexData *dst, u8 *src)
@ -47,9 +51,9 @@ namespace VertexFormatConverter
{
for (int i = 0, j = 0; i < 3; i++, j+=3)
{
dst->normal[i].x = (float)(s8)src[j + 0] / 128;
dst->normal[i].y = (float)(s8)src[j + 1] / 128;
dst->normal[i].z = (float)(s8)src[j + 2] / 128;
dst->normal[i].x = ((s8)src[j + 0]) * S8FRAC;
dst->normal[i].y = ((s8)src[j + 1]) * S8FRAC;
dst->normal[i].z = ((s8)src[j + 2]) * S8FRAC;
}
}
@ -57,9 +61,9 @@ namespace VertexFormatConverter
{
for (int i = 0, j = 0; i < 3; i++, j+=3)
{
dst->normal[i].x = (float)((s16*)src)[j + 0] / 32768;
dst->normal[i].y = (float)((s16*)src)[j + 1] / 32768;
dst->normal[i].z = (float)((s16*)src)[j + 2] / 32768;
dst->normal[i].x = ((s16*)src)[j + 0] * S16FRAC;
dst->normal[i].y = ((s16*)src)[j + 1] * S16FRAC;
dst->normal[i].z = ((s16*)src)[j + 2] * S16FRAC;
}
}