GL Vertex loader moved to VideoCommon.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1692 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard
2008-12-26 13:09:16 +00:00
parent ab01e9e853
commit 95a341a4e9
15 changed files with 55 additions and 115 deletions

View File

@ -0,0 +1,682 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <assert.h>
#include "Common.h"
#include "VideoCommon.h"
#include "Profiler.h"
#include "MemoryUtil.h"
#include "StringUtil.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "LookUpTables.h"
#include "Statistics.h"
#include "VertexLoaderManager.h"
#include "VertexLoader.h"
#include "BPMemory.h"
#include "DataReader.h"
#include "NativeVertexWriter.h"
#include "VertexLoader_Position.h"
#include "VertexLoader_Normal.h"
#include "VertexLoader_Color.h"
#include "VertexLoader_TextCoord.h"
#define USE_JIT
#define COMPILED_CODE_SIZE 4096
NativeVertexFormat *g_nativeVertexFmt;
#ifndef _WIN32
#undef inline
#define inline
#endif
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
// when decoding each vertex.
static u8 s_curposmtx;
static u8 s_curtexmtx[8];
static int s_texmtxwrite = 0;
static int s_texmtxread = 0;
static int loop_counter;
// Vertex loaders read these. Although the scale ones should be baked into the shader.
int tcIndex;
int colIndex;
TVtxAttr* pVtxAttr;
int colElements[2];
float posScale;
float tcScale[8];
using namespace Gen;
void LOADERDECL PosMtx_ReadDirect_UByte()
{
s_curposmtx = DataReadU8() & 0x3f;
PRIM_LOG("posmtx: %d, ", s_curposmtx);
}
void LOADERDECL PosMtx_Write()
{
*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
*VertexManager::s_pCurBufferPointer++ = 0;
*VertexManager::s_pCurBufferPointer++ = 0;
*VertexManager::s_pCurBufferPointer++ = 0;
}
void LOADERDECL TexMtx_ReadDirect_UByte()
{
s_curtexmtx[s_texmtxread] = DataReadU8()&0x3f;
PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]);
s_texmtxread++;
}
void LOADERDECL TexMtx_Write_Float()
{
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 4;
}
void LOADERDECL TexMtx_Write_Float2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 8;
}
void LOADERDECL TexMtx_Write_Short3()
{
((s16*)VertexManager::s_pCurBufferPointer)[0] = 0;
((s16*)VertexManager::s_pCurBufferPointer)[1] = 0;
((s16*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 8;
}
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
{
m_compiledCode = NULL;
m_numLoadedVertices = 0;
m_VertexSize = 0;
m_numPipelineStages = 0;
m_NativeFmt = NativeVertexFormat::Create();
loop_counter = 0;
VertexLoader_Normal::Init();
m_VtxDesc = vtx_desc;
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
AllocCodeSpace(COMPILED_CODE_SIZE);
CompileVertexTranslator();
WriteProtect();
}
VertexLoader::~VertexLoader()
{
FreeCodeSpace();
delete m_NativeFmt;
}
void VertexLoader::CompileVertexTranslator()
{
m_VertexSize = 0;
const TVtxAttr &vtx_attr = m_VtxAttr;
#ifdef USE_JIT
if (m_compiledCode)
PanicAlert("trying to recompile a vtx translator");
m_compiledCode = GetCodePtr();
ABI_EmitPrologue(4);
// Start loop here
const u8 *loop_start = GetCodePtr();
// Reset component counters if present in vertex format only.
if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord ||
m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) {
MOV(32, M(&tcIndex), Imm32(0));
}
if (m_VtxDesc.Color0 || m_VtxDesc.Color1) {
MOV(32, M(&colIndex), Imm32(0));
}
if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx ||
m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) {
MOV(32, M(&s_texmtxwrite), Imm32(0));
MOV(32, M(&s_texmtxread), Imm32(0));
}
#endif
// Colors
const int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
// TextureCoord
// Since m_VtxDesc.Text7Coord is broken across a 32 bit word boundary, retrieve its value manually.
// If we didn't do this, the vertex format would be read as one bit offset from where it should be, making
// 01 become 00, and 10/11 become 01
const int tc[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, (m_VtxDesc.Hex >> 31) & 3
};
// Reset pipeline
m_numPipelineStages = 0;
// It's a bit ugly that we poke inside m_NativeFmt in this function. Planning to fix this.
m_NativeFmt->m_components = 0;
// Position in pc vertex format.
int nat_offset = 0;
PortableVertexDeclaration vtx_decl;
memset(&vtx_decl, 0, sizeof(vtx_decl));
for (int i = 0; i < 8; i++) {
vtx_decl.texcoord_offset[i] = -1;
}
// m_VBVertexStride for texmtx and posmtx is computed later when writing.
// Position Matrix Index
if (m_VtxDesc.PosMatIdx) {
WriteCall(PosMtx_ReadDirect_UByte);
m_NativeFmt->m_components |= VB_HAS_POSMTXIDX;
m_VertexSize += 1;
}
if (m_VtxDesc.Tex0MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX0; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex1MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX1; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex2MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX2; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex3MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX3; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex4MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX4; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex5MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX5; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex6MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX6; WriteCall(TexMtx_ReadDirect_UByte); }
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
switch (m_VtxDesc.Position) {
case NOT_PRESENT: {_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break;
case DIRECT:
switch (m_VtxAttr.PosFormat) {
case FORMAT_UBYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_UByte); break;
case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_Byte); break;
case FORMAT_USHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_UShort); break;
case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_Short); break;
case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; WriteCall(Pos_ReadDirect_Float); break;
default: _assert_(0); break;
}
nat_offset += 12;
break;
case INDEX8:
switch (m_VtxAttr.PosFormat) {
case FORMAT_UBYTE: WriteCall(Pos_ReadIndex8_UByte); break; //WTF?
case FORMAT_BYTE: WriteCall(Pos_ReadIndex8_Byte); break;
case FORMAT_USHORT: WriteCall(Pos_ReadIndex8_UShort); break;
case FORMAT_SHORT: WriteCall(Pos_ReadIndex8_Short); break;
case FORMAT_FLOAT: WriteCall(Pos_ReadIndex8_Float); break;
default: _assert_(0); break;
}
m_VertexSize += 1;
nat_offset += 12;
break;
case INDEX16:
switch (m_VtxAttr.PosFormat) {
case FORMAT_UBYTE: WriteCall(Pos_ReadIndex16_UByte); break;
case FORMAT_BYTE: WriteCall(Pos_ReadIndex16_Byte); break;
case FORMAT_USHORT: WriteCall(Pos_ReadIndex16_UShort); break;
case FORMAT_SHORT: WriteCall(Pos_ReadIndex16_Short); break;
case FORMAT_FLOAT: WriteCall(Pos_ReadIndex16_Float); break;
default: _assert_(0); break;
}
m_VertexSize += 2;
nat_offset += 12;
break;
}
// Normals
vtx_decl.num_normals = 0;
if (m_VtxDesc.Normal != NOT_PRESENT) {
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
if (pFunc == 0)
{
char temp[256];
sprintf(temp,"%i %i %i %i", m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
g_VideoInitialize.pSysMessage("VertexLoader_Normal::GetFunction returned zero!");
}
WriteCall(pFunc);
vtx_decl.num_normals = vtx_attr.NormalElements ? 3 : 1;
switch (vtx_attr.NormalFormat) {
case FORMAT_UBYTE:
case FORMAT_BYTE:
vtx_decl.normal_gl_type = VAR_BYTE;
vtx_decl.normal_gl_size = 4;
vtx_decl.normal_offset[0] = nat_offset;
nat_offset += 4;
if (vtx_attr.NormalElements) {
vtx_decl.normal_offset[1] = nat_offset;
nat_offset += 4;
vtx_decl.normal_offset[2] = nat_offset;
nat_offset += 4;
}
break;
case FORMAT_USHORT:
case FORMAT_SHORT:
vtx_decl.normal_gl_type = VAR_SHORT;
vtx_decl.normal_gl_size = 4;
vtx_decl.normal_offset[0] = nat_offset;
nat_offset += 8;
if (vtx_attr.NormalElements) {
vtx_decl.normal_offset[1] = nat_offset;
nat_offset += 8;
vtx_decl.normal_offset[2] = nat_offset;
nat_offset += 8;
}
break;
case FORMAT_FLOAT:
vtx_decl.normal_gl_type = VAR_FLOAT;
vtx_decl.normal_gl_size = 3;
vtx_decl.normal_offset[0] = nat_offset;
nat_offset += 12;
if (vtx_attr.NormalElements) {
vtx_decl.normal_offset[1] = nat_offset;
nat_offset += 12;
vtx_decl.normal_offset[2] = nat_offset;
nat_offset += 12;
}
break;
default: _assert_(0); break;
}
int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE;
m_NativeFmt->m_components |= VB_HAS_NRM0;
if (numNormals == NRM_THREE)
m_NativeFmt->m_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}
vtx_decl.color_gl_type = VAR_UNSIGNED_BYTE;
for (int i = 0; i < 2; i++) {
m_NativeFmt->m_components |= VB_HAS_COL0 << i;
switch (col[i])
{
case NOT_PRESENT:
m_NativeFmt->m_components &= ~(VB_HAS_COL0 << i);
vtx_decl.color_offset[i] = -1;
break;
case DIRECT:
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_565); break;
case FORMAT_24B_888: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_888); break;
case FORMAT_32B_888x: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_888x); break;
case FORMAT_16B_4444: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_4444); break;
case FORMAT_24B_6666: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_6666); break;
case FORMAT_32B_8888: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_8888); break;
default: _assert_(0); break;
}
break;
case INDEX8:
m_VertexSize += 1;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565: WriteCall(Color_ReadIndex8_16b_565); break;
case FORMAT_24B_888: WriteCall(Color_ReadIndex8_24b_888); break;
case FORMAT_32B_888x: WriteCall(Color_ReadIndex8_32b_888x); break;
case FORMAT_16B_4444: WriteCall(Color_ReadIndex8_16b_4444); break;
case FORMAT_24B_6666: WriteCall(Color_ReadIndex8_24b_6666); break;
case FORMAT_32B_8888: WriteCall(Color_ReadIndex8_32b_8888); break;
default: _assert_(0); break;
}
break;
case INDEX16:
m_VertexSize += 2;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565: WriteCall(Color_ReadIndex16_16b_565); break;
case FORMAT_24B_888: WriteCall(Color_ReadIndex16_24b_888); break;
case FORMAT_32B_888x: WriteCall(Color_ReadIndex16_32b_888x); break;
case FORMAT_16B_4444: WriteCall(Color_ReadIndex16_16b_4444); break;
case FORMAT_24B_6666: WriteCall(Color_ReadIndex16_24b_6666); break;
case FORMAT_32B_8888: WriteCall(Color_ReadIndex16_32b_8888); break;
default: _assert_(0); break;
}
break;
}
// Common for the three bottom cases
if (col[i] != NOT_PRESENT) {
vtx_decl.color_offset[i] = nat_offset;
nat_offset += 4;
}
}
// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
for (int i = 0; i < 8; i++) {
m_NativeFmt->m_components |= VB_HAS_UV0 << i;
int elements = m_VtxAttr.texCoord[i].Elements;
switch (tc[i])
{
case NOT_PRESENT:
m_NativeFmt->m_components &= ~(VB_HAS_UV0 << i);
break;
case DIRECT:
switch (m_VtxAttr.texCoord[i].Format)
{
case FORMAT_UBYTE: m_VertexSize += elements?2:1; WriteCall(elements?TexCoord_ReadDirect_UByte2:TexCoord_ReadDirect_UByte1); break;
case FORMAT_BYTE: m_VertexSize += elements?2:1; WriteCall(elements?TexCoord_ReadDirect_Byte2:TexCoord_ReadDirect_Byte1); break;
case FORMAT_USHORT: m_VertexSize += elements?4:2; WriteCall(elements?TexCoord_ReadDirect_UShort2:TexCoord_ReadDirect_UShort1); break;
case FORMAT_SHORT: m_VertexSize += elements?4:2; WriteCall(elements?TexCoord_ReadDirect_Short2:TexCoord_ReadDirect_Short1); break;
case FORMAT_FLOAT: m_VertexSize += elements?8:4; WriteCall(elements?TexCoord_ReadDirect_Float2:TexCoord_ReadDirect_Float1); break;
default: _assert_(0); break;
}
break;
case INDEX8:
m_VertexSize += 1;
switch (m_VtxAttr.texCoord[i].Format)
{
case FORMAT_UBYTE: WriteCall(elements?TexCoord_ReadIndex8_UByte2:TexCoord_ReadIndex8_UByte1); break;
case FORMAT_BYTE: WriteCall(elements?TexCoord_ReadIndex8_Byte2:TexCoord_ReadIndex8_Byte1); break;
case FORMAT_USHORT: WriteCall(elements?TexCoord_ReadIndex8_UShort2:TexCoord_ReadIndex8_UShort1); break;
case FORMAT_SHORT: WriteCall(elements?TexCoord_ReadIndex8_Short2:TexCoord_ReadIndex8_Short1); break;
case FORMAT_FLOAT: WriteCall(elements?TexCoord_ReadIndex8_Float2:TexCoord_ReadIndex8_Float1); break;
default: _assert_(0); break;
}
break;
case INDEX16:
m_VertexSize += 2;
switch (m_VtxAttr.texCoord[i].Format)
{
case FORMAT_UBYTE: WriteCall(elements?TexCoord_ReadIndex16_UByte2:TexCoord_ReadIndex16_UByte1); break;
case FORMAT_BYTE: WriteCall(elements?TexCoord_ReadIndex16_Byte2:TexCoord_ReadIndex16_Byte1); break;
case FORMAT_USHORT: WriteCall(elements?TexCoord_ReadIndex16_UShort2:TexCoord_ReadIndex16_UShort1); break;
case FORMAT_SHORT: WriteCall(elements?TexCoord_ReadIndex16_Short2:TexCoord_ReadIndex16_Short1); break;
case FORMAT_FLOAT: WriteCall(elements?TexCoord_ReadIndex16_Float2:TexCoord_ReadIndex16_Float1); break;
default: _assert_(0);
}
break;
}
if (m_NativeFmt->m_components & (VB_HAS_TEXMTXIDX0 << i)) {
if (tc[i] != NOT_PRESENT) {
// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
vtx_decl.texcoord_offset[i] = nat_offset;
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
vtx_decl.texcoord_size[i] = 3;
nat_offset += 12;
WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
}
else {
m_NativeFmt->m_components |= VB_HAS_UV0 << i; // have to include since using now
vtx_decl.texcoord_offset[i] = nat_offset;
vtx_decl.texcoord_gl_type[i] = VAR_SHORT;
vtx_decl.texcoord_size[i] = 4;
nat_offset += 8; // still include the texture coordinate, but this time as 6 + 2 bytes
WriteCall(TexMtx_Write_Short3);
}
}
else {
if (tc[i] != NOT_PRESENT) {
vtx_decl.texcoord_offset[i] = nat_offset;
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
vtx_decl.texcoord_size[i] = vtx_attr.texCoord[i].Elements ? 2 : 1;
nat_offset += 4 * (vtx_attr.texCoord[i].Elements ? 2 : 1);
} else {
vtx_decl.texcoord_offset[i] = -1;
}
}
if (tc[i] == NOT_PRESENT) {
// if there's more tex coords later, have to write a dummy call
int j = i + 1;
for (; j < 8; ++j) {
if (tc[j] != NOT_PRESENT) {
WriteCall(TexCoord_Read_Dummy); // important to get indices right!
break;
}
}
// tricky!
if (j == 8 && !((m_NativeFmt->m_components & VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL << (i + 1)))) {
// no more tex coords and tex matrices, so exit loop
break;
}
}
}
if (m_VtxDesc.PosMatIdx) {
WriteCall(PosMtx_Write);
vtx_decl.posmtx_offset = nat_offset;
nat_offset += 4;
} else {
vtx_decl.posmtx_offset = -1;
}
native_stride = nat_offset;
vtx_decl.stride = native_stride;
#ifdef USE_JIT
// End loop here
SUB(32, M(&loop_counter), Imm8(1));
//SUB(32, R(EBX), Imm8(1));
J_CC(CC_NZ, loop_start, true);
ABI_EmitEpilogue(4);
#endif
m_NativeFmt->Initialize(vtx_decl);
}
void VertexLoader::WriteCall(TPipelineFunction func)
{
#ifdef USE_JIT
CALL((void*)func);
#else
m_PipelineStages[m_numPipelineStages++] = func;
#endif
}
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{
DVSTARTPROFILE();
m_numLoadedVertices += count;
// Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
{
VertexManager::Flush();
// Also move the Set() here?
}
g_nativeVertexFmt = m_NativeFmt;
if (bpmem.genMode.cullmode == 3 && primitive < 5)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
}
VertexManager::EnableComponents(m_NativeFmt->m_components);
// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
pVtxAttr = &m_VtxAttr;
posScale = shiftLookup[m_VtxAttr.PosFrac];
if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
int granularity = 1;
switch (primitive) {
case 3: // strip .. hm, weird
case 4: // fan
if (VertexManager::GetRemainingSize() < 3 * native_stride)
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2 * native_stride)
VertexManager::Flush();
break;
case 0: granularity = 4; break; // quads
case 2: granularity = 3; break; // tris
case 5: granularity = 2; break; // lines
}
int startv = 0, extraverts = 0;
int v = 0;
while (v < count)
{
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush();
// Why does this need to be so complicated?
switch (primitive) {
case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs
if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
VertexManager::s_pCurBufferPointer += native_stride*3;
extraverts = 3;
}
else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
VertexManager::s_pCurBufferPointer += native_stride*2;
extraverts = 2;
}
break;
case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 2;
break;
case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 1;
break;
default:
extraverts = 0;
break;
}
startv = v;
}
int remainingPrims = remainingVerts / granularity;
remainingVerts = remainingPrims * granularity;
if (count - v < remainingVerts)
remainingVerts = count - v;
#ifdef USE_JIT
if (remainingVerts > 0) {
loop_counter = remainingVerts;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
v += remainingVerts;
}
if (startv < count)
VertexManager::AddVertices(primitive, count - startv + extraverts);
}
void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
{
VAT vat;
vat.g0.Hex = _group0;
vat.g1.Hex = _group1;
vat.g2.Hex = _group2;
m_VtxAttr.PosElements = vat.g0.PosElements;
m_VtxAttr.PosFormat = vat.g0.PosFormat;
m_VtxAttr.PosFrac = vat.g0.PosFrac;
m_VtxAttr.NormalElements = vat.g0.NormalElements;
m_VtxAttr.NormalFormat = vat.g0.NormalFormat;
m_VtxAttr.color[0].Elements = vat.g0.Color0Elements;
m_VtxAttr.color[0].Comp = vat.g0.Color0Comp;
m_VtxAttr.color[1].Elements = vat.g0.Color1Elements;
m_VtxAttr.color[1].Comp = vat.g0.Color1Comp;
m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements;
m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat;
m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac;
m_VtxAttr.ByteDequant = vat.g0.ByteDequant;
m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3;
m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements;
m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat;
m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac;
m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements;
m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat;
m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac;
m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements;
m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat;
m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac;
m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements;
m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat;
m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac;
m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements;
m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat;
m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac;
m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements;
m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat;
m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac;
m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements;
m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat;
m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac;
};
void VertexLoader::AppendToString(std::string *dest) {
static const char *posMode[4] = {
"Invalid",
"Direct",
"Idx8",
"Idx16",
};
static const char *posFormats[5] = {
"u8", "s8", "u16", "s16", "flt",
};
dest->append(StringFromFormat("sz: %i skin: %i Pos: %i %s %s Nrm: %i %s %s - %i vtx\n",
m_VertexSize, m_VtxDesc.PosMatIdx, m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat],
m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat], m_numLoadedVertices));
}

View File

@ -0,0 +1,101 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _VERTEXLOADER_H
#define _VERTEXLOADER_H
#include <string>
#include "CPMemory.h"
#include "DataReader.h"
#include "NativeVertexFormat.h"
#include "x64Emitter.h"
class VertexLoaderUID
{
u32 vid[5];
public:
VertexLoaderUID() {}
void InitFromCurrentState(int vtx_attr_group) {
vid[0] = g_VtxDesc.Hex & 0xFFFFFFFF;
vid[1] = g_VtxDesc.Hex >> 32;
vid[2] = g_VtxAttr[vtx_attr_group].g0.Hex & ~VAT_0_FRACBITS;
vid[3] = g_VtxAttr[vtx_attr_group].g1.Hex & ~VAT_1_FRACBITS;
vid[4] = g_VtxAttr[vtx_attr_group].g2.Hex & ~VAT_2_FRACBITS;
}
bool operator < (const VertexLoaderUID &other) const {
if (vid[0] < other.vid[0])
return true;
else if (vid[0] > other.vid[0])
return false;
for (int i = 1; i < 5; ++i) {
if (vid[i] < other.vid[i])
return true;
else if (vid[i] > other.vid[i])
return false;
}
return false;
}
};
class VertexLoader : public Gen::XCodeBlock
{
public:
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
~VertexLoader();
int GetVertexSize() const {return m_VertexSize;}
void RunVertices(int vtx_attr_group, int primitive, int count);
// For debugging / profiling
void AppendToString(std::string *dest);
private:
enum
{
NRM_ZERO = 0,
NRM_ONE = 1,
NRM_THREE = 3,
};
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
// GC vertex format
TVtxAttr m_VtxAttr; // VAT decoded into easy format
TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided.
// PC vertex format
NativeVertexFormat *m_NativeFmt;
int native_stride;
// Pipeline. To be JIT compiled in the future.
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
int m_numPipelineStages;
const u8 *m_compiledCode;
int m_numLoadedVertices;
void SetVAT(u32 _group0, u32 _group1, u32 _group2);
void CompileVertexTranslator();
void WriteCall(TPipelineFunction);
};
#endif

View File

@ -0,0 +1,232 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _VERTEXLOADERCOLOR_H
#define _VERTEXLOADERCOLOR_H
#include "Common.h"
#include "VideoCommon.h"
#include "LookUpTables.h"
#include "VertexLoader.h"
#include "VertexLoader_Color.h"
#include "NativeVertexWriter.h"
#define RSHIFT 0
#define GSHIFT 8
#define BSHIFT 16
#define ASHIFT 24
extern int colIndex;
extern int colElements[2];
inline void _SetCol(u32 val)
{
*(u32*)VertexManager::s_pCurBufferPointer = val;
VertexManager::s_pCurBufferPointer += 4;
colIndex++;
}
void _SetCol4444(u16 val)
{
u32 col = lut4to8[(val>>0)&0xF]<<ASHIFT;
col |= lut4to8[(val>>12)&0xF] <<RSHIFT;
col |= lut4to8[(val>>8)&0xF] <<GSHIFT;
col |= lut4to8[(val>>4)&0xF] <<BSHIFT;
_SetCol(col);
}
void _SetCol6666(u32 val)
{
u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT;
col |= lut6to8[(val>>12)&0x3F] << GSHIFT;
col |= lut6to8[(val>>6)&0x3F] << BSHIFT;
col |= lut6to8[(val>>0)&0x3F] << ASHIFT;
_SetCol(col);
}
void _SetCol565(u16 val)
{
u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT;
col |= lut6to8[(val>>5 )&0x3f] << GSHIFT;
col |= lut5to8[(val )&0x1f] << BSHIFT;
_SetCol(col | (0xFF<<ASHIFT));
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
inline u32 _Read24(u32 iAddress)
{
u32 col = Memory_Read_U8(iAddress) << RSHIFT; //should just get a pointer to main memory instead of going thru slow memhandler
col |= Memory_Read_U8(iAddress+1) << GSHIFT; //we can guarantee that it is reading from main memory
col |= Memory_Read_U8(iAddress+2) << BSHIFT;
return col | (0xFF<<ASHIFT);
}
inline u32 _Read32(u32 iAddress)
{
u32 col = Memory_Read_U8(iAddress) << RSHIFT; //should just get a pointer to main memory instead of going thru slow memhandler
col |= Memory_Read_U8(iAddress+1) << GSHIFT; //we can guarantee that it is reading from main memory
col |= Memory_Read_U8(iAddress+2) << BSHIFT;
col |= Memory_Read_U8(iAddress+3) << ASHIFT;
return col;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
void LOADERDECL Color_ReadDirect_24b_888()
{
u32 col = DataReadU8()<<RSHIFT;
col |= DataReadU8()<<GSHIFT;
col |= DataReadU8()<<BSHIFT;
_SetCol(col | (0xFF<<ASHIFT));
}
void LOADERDECL Color_ReadDirect_32b_888x(){
u32 col = DataReadU8()<<RSHIFT;
col |= DataReadU8()<<GSHIFT;
col |= DataReadU8()<<BSHIFT;
_SetCol(col | (0xFF<<ASHIFT));
DataReadU8();
}
void LOADERDECL Color_ReadDirect_16b_565()
{
_SetCol565(DataReadU16());
}
void LOADERDECL Color_ReadDirect_16b_4444()
{
_SetCol4444(DataReadU16());
}
void LOADERDECL Color_ReadDirect_24b_6666()
{
u32 val = DataReadU8()<<16;
val|=DataReadU8()<<8;
val|=DataReadU8();
_SetCol6666(val);
}
// F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only
// at least it fixes mario party 4
//
// if (colElements[colIndex])
// else
// col |= 0xFF<<ASHIFT;
//
void LOADERDECL Color_ReadDirect_32b_8888()
{
// TODO (mb2): check this
u32 col = DataReadU8()<<RSHIFT;
col |= DataReadU8()<<GSHIFT;
col |= DataReadU8()<<BSHIFT;
col |= DataReadU8()<<ASHIFT;
// "kill" the alpha
if (!colElements[colIndex])
col |= 0xFF<<ASHIFT;
_SetCol(col);
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
void LOADERDECL Color_ReadIndex8_16b_565()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
u16 val = Memory_Read_U16(iAddress);
_SetCol565(val);
}
void LOADERDECL Color_ReadIndex8_24b_888()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex8_32b_888x()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR]+colIndex);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex8_16b_4444()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
u16 val = Memory_Read_U16(iAddress);
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex8_24b_6666()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
u32 val = Memory_Read_U8(iAddress+2) |
(Memory_Read_U8(iAddress+1)<<8) |
(Memory_Read_U8(iAddress)<<16);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex8_32b_8888()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
void LOADERDECL Color_ReadIndex16_16b_565()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
u16 val = Memory_Read_U16(iAddress);
_SetCol565(val);
}
void LOADERDECL Color_ReadIndex16_24b_888()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_32b_888x()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_16b_4444()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
u16 val = Memory_Read_U16(iAddress);
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex16_24b_6666()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
u32 val = Memory_Read_U8(iAddress+2) |
(Memory_Read_U8(iAddress+1)<<8) |
(Memory_Read_U8(iAddress)<<16);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex16_32b_8888()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
#endif

View File

@ -0,0 +1,42 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _VERTEXLOADERCOLOR_H
#define _VERTEXLOADERCOLOR_H
void LOADERDECL Color_ReadDirect_24b_888();
void LOADERDECL Color_ReadDirect_32b_888x();
void LOADERDECL Color_ReadDirect_16b_565();
void LOADERDECL Color_ReadDirect_16b_4444();
void LOADERDECL Color_ReadDirect_24b_6666();
void LOADERDECL Color_ReadDirect_32b_8888();
void LOADERDECL Color_ReadIndex8_16b_565();
void LOADERDECL Color_ReadIndex8_24b_888();
void LOADERDECL Color_ReadIndex8_32b_888x();
void LOADERDECL Color_ReadIndex8_16b_4444();
void LOADERDECL Color_ReadIndex8_24b_6666();
void LOADERDECL Color_ReadIndex8_32b_8888();
void LOADERDECL Color_ReadIndex16_16b_565();
void LOADERDECL Color_ReadIndex16_24b_888();
void LOADERDECL Color_ReadIndex16_32b_888x();
void LOADERDECL Color_ReadIndex16_16b_4444();
void LOADERDECL Color_ReadIndex16_24b_6666();
void LOADERDECL Color_ReadIndex16_32b_8888();
#endif

View File

@ -0,0 +1,424 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "VideoCommon.h"
#include "VertexLoader.h"
#include "VertexLoader_Normal.h"
#include "NativeVertexWriter.h"
#define LOG_NORM8() PRIM_LOG("norm: %f %f %f, ", ((s8*)VertexManager::s_pCurBufferPointer)[-3]/127.0f, ((s8*)VertexManager::s_pCurBufferPointer)[-2]/127.0f, ((s8*)VertexManager::s_pCurBufferPointer)[-1]/127.0f);
#define LOG_NORM16() PRIM_LOG("norm: %f %f %f, ", ((s16*)VertexManager::s_pCurBufferPointer)[-3]/32767.0f, ((s16*)VertexManager::s_pCurBufferPointer)[-2]/32767.0f, ((s16*)VertexManager::s_pCurBufferPointer)[-1]/32767.0f);
#define LOG_NORMF() PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
void VertexLoader_Normal::Init(void)
{
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3);
}
unsigned int VertexLoader_Normal::GetSize(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3)
{
return m_Table[_type][_index3][_elements][_format].gc_size;
}
TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3)
{
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
return pFunc;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////
// --- Direct ---
/////////////////////////////////////////////////////////////////////////////////////////////////////
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
{
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)DataReadU8()+0.5f) / 127.5f;
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
{
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16()
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
// ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
// ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF()
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
{
for (int i = 0; i < 3; i++)
{
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
{
for (int i = 0; i < 3; i++)
{
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
{
for (int i = 0; i < 3; i++)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////
// --- Index8 ---
/////////////////////////////////////////////////////////////////////////////////////////////////////
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
VertexManager::s_pCurBufferPointer++;
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)Memory_Read_U8(iAddress)+0.5f) / 127.5f;
// ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(signed char)Memory_Read_U8(iAddress+1)+0.5f) / 127.5f;
// ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(signed char)Memory_Read_U8(iAddress+2)+0.5f) / 127.5f;
// VertexManager::s_pCurBufferPointer += 12;
LOG_NORM8();
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
{
u8 Index = DataReadU8();
for (int i = 0; i < 3; i++)
{
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
{
u8 Index = DataReadU8();
for (int i = 0; i < 3; i++)
{
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
{
u8 Index = DataReadU8();
for (int i = 0; i < 3; i++)
{
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
*VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////
// --- Index16 ---
/////////////////////////////////////////////////////////////////////////////////////////////////////
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
{
u16 Index = DataReadU16();
for (int i = 0; i < 3; i++)
{
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
{
u16 Index = DataReadU16();
for (int i = 0; i < 3; i++)
{
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
{
u16 Index = DataReadU16();
for (int i = 0; i < 3; i++)
{
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
VertexManager::s_pCurBufferPointer++;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
}

View File

@ -0,0 +1,111 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _VERTEXLOADER_NORMAL_H
#define _VERTEXLOADER_NORMAL_H
#include "CommonTypes.h"
class VertexLoader_Normal
{
public:
// Init
static void Init(void);
// GetSize
static unsigned int GetSize(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3);
// GetFunction
static TPipelineFunction GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3);
private:
enum ENormalType
{
NRM_NOT_PRESENT = 0,
NRM_DIRECT = 1,
NRM_INDEX8 = 2,
NRM_INDEX16 = 3,
NUM_NRM_TYPE
};
enum ENormalFormat
{
FORMAT_UBYTE = 0,
FORMAT_BYTE = 1,
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
NUM_NRM_FORMAT
};
enum ENormalElements
{
NRM_NBT = 0,
NRM_NBT3 = 1,
NUM_NRM_ELEMENTS
};
enum ENormalIndices
{
NRM_INDICES1 = 0,
NRM_INDICES3 = 1,
NUM_NRM_INDICES
};
struct Set {
Set() {}
Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {}
int gc_size;
TPipelineFunction function;
// int pc_size;
};
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
// direct
static void LOADERDECL Normal_DirectByte();
static void LOADERDECL Normal_DirectShort();
static void LOADERDECL Normal_DirectFloat();
static void LOADERDECL Normal_DirectByte3();
static void LOADERDECL Normal_DirectShort3();
static void LOADERDECL Normal_DirectFloat3();
// index8
static void LOADERDECL Normal_Index8_Byte();
static void LOADERDECL Normal_Index8_Short();
static void LOADERDECL Normal_Index8_Float();
static void LOADERDECL Normal_Index8_Byte3_Indices1();
static void LOADERDECL Normal_Index8_Short3_Indices1();
static void LOADERDECL Normal_Index8_Float3_Indices1();
static void LOADERDECL Normal_Index8_Byte3_Indices3();
static void LOADERDECL Normal_Index8_Short3_Indices3();
static void LOADERDECL Normal_Index8_Float3_Indices3();
// index16
static void LOADERDECL Normal_Index16_Byte();
static void LOADERDECL Normal_Index16_Short();
static void LOADERDECL Normal_Index16_Float();
static void LOADERDECL Normal_Index16_Byte3_Indices1();
static void LOADERDECL Normal_Index16_Short3_Indices1();
static void LOADERDECL Normal_Index16_Float3_Indices1();
static void LOADERDECL Normal_Index16_Byte3_Indices3();
static void LOADERDECL Normal_Index16_Short3_Indices3();
static void LOADERDECL Normal_Index16_Float3_Indices3();
};
#endif

View File

@ -0,0 +1,242 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef VERTEXLOADER_POSITION_H
#define VERTEXLOADER_POSITION_H
#include "Common.h"
#include "VideoCommon.h"
#include "VertexLoader.h"
#include "VertexLoader_Position.h"
#include "NativeVertexWriter.h"
extern float posScale;
extern TVtxAttr *pVtxAttr;
#define LOG_VTX() PRIM_LOG("vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]);
// Thoughts on the implementation of a vertex loader compiler.
// s_pCurBufferPointer should definitely be in a register.
// Could load the position scale factor in XMM7, for example.
// The pointer inside DataReadU8 in another.
// Let's check out Pos_ReadDirect_UByte(). For Byte, replace MOVZX with MOVSX.
/*
MOVZX(32, R(EAX), MOffset(ESI, 0));
MOVZX(32, R(EBX), MOffset(ESI, 1));
MOVZX(32, R(ECX), MOffset(ESI, 2));
MOVD(XMM0, R(EAX));
MOVD(XMM1, R(EBX));
MOVD(XMM2, R(ECX));
CVTDQ2PS(XMM0, XMM0);
CVTDQ2PS(XMM1, XMM1);
CVTDQ2PS(XMM2, XMM2);
MULSS(XMM0, XMM7);
MULSS(XMM1, XMM7);
MULSS(XMM2, XMM7);
MOVSS(MOffset(EDI, 0), XMM0);
MOVSS(MOffset(EDI, 4), XMM1);
MOVSS(MOffset(EDI, 8), XMM2);
Alternatively, lookup table:
MOVZX(32, R(EAX), MOffset(ESI, 0));
MOVZX(32, R(EBX), MOffset(ESI, 1));
MOVZX(32, R(ECX), MOffset(ESI, 2));
MOV(32, R(EAX), MComplex(LUTREG, EAX, 4));
MOV(32, R(EBX), MComplex(LUTREG, EBX, 4));
MOV(32, R(ECX), MComplex(LUTREG, ECX, 4));
MOV(MOffset(EDI, 0), XMM0);
MOV(MOffset(EDI, 4), XMM1);
MOV(MOffset(EDI, 8), XMM2);
SSE4:
PINSRB(XMM0, MOffset(ESI, 0), 0);
PINSRB(XMM0, MOffset(ESI, 1), 4);
PINSRB(XMM0, MOffset(ESI, 2), 8);
CVTDQ2PS(XMM0, XMM0);
<two unpacks here to sign extend>
MULPS(XMM0, XMM7);
MOVUPS(MOffset(EDI, 0), XMM0);
*/
// ==============================================================================
// Direct
// ==============================================================================
void LOADERDECL Pos_ReadDirect_UByte()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * posScale;
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU8() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_Byte()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * posScale;
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s8)DataReadU8() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_UShort()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * posScale;
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU16() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_Short()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * posScale;
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s16)DataReadU16() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_Float()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
if (pVtxAttr->PosElements)
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
#define Pos_ReadIndex_Byte(T) { \
u32 iAddress = arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); \
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Memory_Read_U8(iAddress)) * posScale; \
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Memory_Read_U8(iAddress+1)) * posScale; \
if (pVtxAttr->PosElements) \
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Memory_Read_U8(iAddress+2)) * posScale; \
else \
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; \
LOG_VTX(); \
VertexManager::s_pCurBufferPointer += 12; \
}
#define Pos_ReadIndex_Short(T) { \
u32 iAddress = arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); \
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Memory_Read_U16(iAddress)) * posScale; \
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Memory_Read_U16(iAddress+2)) * posScale; \
if (pVtxAttr->PosElements) \
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Memory_Read_U16(iAddress+4)) * posScale; \
else \
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; \
LOG_VTX(); \
VertexManager::s_pCurBufferPointer += 12; \
}
#define Pos_ReadIndex_Float() { \
u32 iAddress = arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]); \
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); \
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); \
if (pVtxAttr->PosElements) \
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); \
else \
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; \
LOG_VTX(); \
VertexManager::s_pCurBufferPointer += 12; \
}
// ==============================================================================
// Index 8
// ==============================================================================
void LOADERDECL Pos_ReadIndex8_UByte()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Byte(u8);
}
void LOADERDECL Pos_ReadIndex8_Byte()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Byte(s8);
}
void LOADERDECL Pos_ReadIndex8_UShort()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Short(u16);
}
void LOADERDECL Pos_ReadIndex8_Short()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Short(s16);
}
void LOADERDECL Pos_ReadIndex8_Float()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Float();
}
// ==============================================================================
// Index 16
// ==============================================================================
void LOADERDECL Pos_ReadIndex16_UByte(){
u16 Index = DataReadU16();
Pos_ReadIndex_Byte(u8);
}
void LOADERDECL Pos_ReadIndex16_Byte(){
u16 Index = DataReadU16();
Pos_ReadIndex_Byte(s8);
}
void LOADERDECL Pos_ReadIndex16_UShort(){
u16 Index = DataReadU16();
Pos_ReadIndex_Short(u16);
}
void LOADERDECL Pos_ReadIndex16_Short()
{
u16 Index = DataReadU16();
Pos_ReadIndex_Short(s16);
}
void LOADERDECL Pos_ReadIndex16_Float()
{
u16 Index = DataReadU16();
Pos_ReadIndex_Float();
}
#endif

View File

@ -0,0 +1,39 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef VERTEXLOADER_POSITION_H
#define VERTEXLOADER_POSITION_H
void LOADERDECL Pos_ReadDirect_UByte();
void LOADERDECL Pos_ReadDirect_Byte();
void LOADERDECL Pos_ReadDirect_UShort();
void LOADERDECL Pos_ReadDirect_Short();
void LOADERDECL Pos_ReadDirect_Float();
void LOADERDECL Pos_ReadIndex8_UByte();
void LOADERDECL Pos_ReadIndex8_Byte();
void LOADERDECL Pos_ReadIndex8_UShort();
void LOADERDECL Pos_ReadIndex8_Short();
void LOADERDECL Pos_ReadIndex8_Float();
void LOADERDECL Pos_ReadIndex16_UByte();
void LOADERDECL Pos_ReadIndex16_Byte();
void LOADERDECL Pos_ReadIndex16_UShort();
void LOADERDECL Pos_ReadIndex16_Short();
void LOADERDECL Pos_ReadIndex16_Float();
#endif

View File

@ -0,0 +1,338 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef VERTEXLOADER_TEXCOORD_H
#define VERTEXLOADER_TEXCOORD_H
#include "Common.h"
#include "VideoCommon.h"
#include "VertexLoader.h"
#include "VertexLoader_Position.h"
#include "NativeVertexWriter.h"
#define LOG_TEX1() PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]);
#define LOG_TEX2() PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]);
extern int tcIndex;
extern float tcScale[8];
void LOADERDECL TexCoord_Read_Dummy()
{
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UByte1()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UByte2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Byte1()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Byte2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UShort1()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UShort2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Short1()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Short2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Float1()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Float2()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
// ==================================================================================
void LOADERDECL TexCoord_ReadIndex8_UByte1()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UByte2()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Byte1()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Byte2()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UShort1()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UShort2()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Short1()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Short2()
{
u8 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Float1()
{
u16 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Float2()
{
u16 Index = DataReadU8();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
// ==================================================================================
void LOADERDECL TexCoord_ReadIndex16_UByte1()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UByte2()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Byte1()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Byte2()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UShort1()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UShort2()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Short1()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Short2()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Float1()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Float2()
{
u16 Index = DataReadU16();
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress + 4);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
#endif

View File

@ -0,0 +1,53 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef VERTEXLOADER_TEXCOORD_H
#define VERTEXLOADER_TEXCOORD_H
void LOADERDECL TexCoord_Read_Dummy();
void LOADERDECL TexCoord_ReadDirect_UByte1();
void LOADERDECL TexCoord_ReadDirect_UByte2();
void LOADERDECL TexCoord_ReadDirect_Byte1();
void LOADERDECL TexCoord_ReadDirect_Byte2();
void LOADERDECL TexCoord_ReadDirect_UShort1();
void LOADERDECL TexCoord_ReadDirect_UShort2();
void LOADERDECL TexCoord_ReadDirect_Short1();
void LOADERDECL TexCoord_ReadDirect_Short2();
void LOADERDECL TexCoord_ReadDirect_Float1();
void LOADERDECL TexCoord_ReadDirect_Float2();
void LOADERDECL TexCoord_ReadIndex8_UByte1();
void LOADERDECL TexCoord_ReadIndex8_UByte2();
void LOADERDECL TexCoord_ReadIndex8_Byte1();
void LOADERDECL TexCoord_ReadIndex8_Byte2();
void LOADERDECL TexCoord_ReadIndex8_UShort1();
void LOADERDECL TexCoord_ReadIndex8_UShort2();
void LOADERDECL TexCoord_ReadIndex8_Short1();
void LOADERDECL TexCoord_ReadIndex8_Short2();
void LOADERDECL TexCoord_ReadIndex8_Float1();
void LOADERDECL TexCoord_ReadIndex8_Float2();
void LOADERDECL TexCoord_ReadIndex16_UByte1();
void LOADERDECL TexCoord_ReadIndex16_UByte2();
void LOADERDECL TexCoord_ReadIndex16_Byte1();
void LOADERDECL TexCoord_ReadIndex16_Byte2();
void LOADERDECL TexCoord_ReadIndex16_UShort1();
void LOADERDECL TexCoord_ReadIndex16_UShort2();
void LOADERDECL TexCoord_ReadIndex16_Short1();
void LOADERDECL TexCoord_ReadIndex16_Short2();
void LOADERDECL TexCoord_ReadIndex16_Float1();
void LOADERDECL TexCoord_ReadIndex16_Float2();
#endif

View File

@ -21,6 +21,16 @@
#include "Common.h"
#include "pluginspecs_video.h"
#if defined(_MSC_VER) && !defined(__x86_64__) && !defined(_M_X64)
void * memcpy_amd(void *dest, const void *src, size_t n);
unsigned char memcmp_mmx(const void* src1, const void* src2, int cmpsize);
#define memcpy_gc memcpy_amd
#define memcmp_gc memcmp_mmx
#else
#define memcpy_gc memcpy
#define memcmp_gc memcmp
#endif
enum {
EFB_WIDTH = 640,
EFB_HEIGHT = 528,

View File

@ -0,0 +1,473 @@
/******************************************************************************
Copyright (c) 2001 Advanced Micro Devices, Inc.
LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
NOT APPLY TO YOU.
AMD does not assume any responsibility for any errors which may appear in the
Materials nor any responsibility to support or update the Materials. AMD retains
the right to make changes to its test specifications at any time, without notice.
NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
further information, software, technical information, know-how, or show-how
available to you.
So that all may benefit from your experience, please report any problems
or suggestions about this software to 3dsdk.support@amd.com
AMD Developer Technologies, M/S 585
Advanced Micro Devices, Inc.
5900 E. Ben White Blvd.
Austin, TX 78741
3dsdk.support@amd.com
******************************************************************************/
#include <assert.h>
/*****************************************************************************
MEMCPY_AMD.CPP
******************************************************************************/
// Very optimized memcpy() routine for AMD Athlon and Duron family.
// This code uses any of FOUR different basic copy methods, depending
// on the transfer size.
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
// "Streaming Store"), and also uses the software prefetch instructions,
// be sure you're running on Athlon/Duron or other recent CPU before calling!
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop".
#define IN_CACHE_COPY 2 * 1024 // upper limit for movq/movq copy w/SW prefetch
// Next is a copy that uses the MMX registers to copy 8 bytes at a time,
// also using the "unrolled loop" optimization. This code uses
// the software prefetch instruction to get the data into the cache.
#define UNCACHED_COPY 4 * 1024 // upper limit for movq/movntq w/SW prefetch
// For larger blocks, which will spill beyond the cache, it's faster to
// use the Streaming Store instruction MOVNTQ. This write instruction
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
// USE 64 * 1024 FOR THIS VALUE IF YOU'RE ALWAYS FILLING A "CLEAN CACHE"
#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch. The technique is great for
// getting maximum read bandwidth, especially in DDR memory systems.
//#include <stddef.h>
// Inline assembly syntax for use with Visual C++
#ifdef _WIN32
#include <windows.h>
#endif
#if defined(_MSC_VER) && !defined(__x86_64__) && !defined(_M_X64)
void * memcpy_amd(void *dest, const void *src, size_t n)
{
__asm {
mov ecx, [n] ; number of bytes to copy
mov edi, [dest] ; destination
mov esi, [src] ; source
mov ebx, ecx ; keep a copy of count
cld
cmp ecx, TINY_BLOCK_COPY
jb $memcpy_ic_3 ; tiny? skip mmx copy
cmp ecx, 32*1024 ; do not align between 32k-64k because
jbe $memcpy_do_align ; it appears to be slower
cmp ecx, 64*1024
jbe $memcpy_align_done
$memcpy_do_align:
mov ecx, 8 ; a trick that is faster than rep movsb...
sub ecx, edi ; align destination to qword
and ecx, 111b ; get the low bits
sub ebx, ecx ; update copy count
neg ecx ; set up to jump into the array
add ecx, offset $memcpy_align_done
jmp ecx ; jump to array of movsb''s
align 4
movsb
movsb
movsb
movsb
movsb
movsb
movsb
movsb
$memcpy_align_done: ; destination is dword aligned
mov ecx, ebx ; number of bytes left to copy
shr ecx, 6 ; get 64-byte block count
jz $memcpy_ic_2 ; finish the last few bytes
cmp ecx, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
jae $memcpy_uc_test
// This is small block copy that uses the MMX registers to copy 8 bytes
// at a time. It uses the "unrolled loop" optimization, and also uses
// the software prefetch instruction to get the data into the cache.
align 16
$memcpy_ic_1: ; 64-byte block copies, in-cache copy
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
movq mm0, [esi+0] ; read 64 bits
movq mm1, [esi+8]
movq [edi+0], mm0 ; write 64 bits
movq [edi+8], mm1 ; note: the normal movq writes the
movq mm2, [esi+16] ; data to cache; a cache line will be
movq mm3, [esi+24] ; allocated as needed, to store the data
movq [edi+16], mm2
movq [edi+24], mm3
movq mm0, [esi+32]
movq mm1, [esi+40]
movq [edi+32], mm0
movq [edi+40], mm1
movq mm2, [esi+48]
movq mm3, [esi+56]
movq [edi+48], mm2
movq [edi+56], mm3
add esi, 64 ; update source pointer
add edi, 64 ; update destination pointer
dec ecx ; count down
jnz $memcpy_ic_1 ; last 64-byte block?
$memcpy_ic_2:
mov ecx, ebx ; has valid low 6 bits of the byte count
$memcpy_ic_3:
shr ecx, 2 ; dword count
and ecx, 1111b ; only look at the "remainder" bits
neg ecx ; set up to jump into the array
add ecx, offset $memcpy_last_few
jmp ecx ; jump to array of movsd''s
$memcpy_uc_test:
cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
jae $memcpy_bp_1
$memcpy_64_test:
or ecx, ecx ; tail end of block prefetch will jump here
jz $memcpy_ic_2 ; no more 64-byte blocks left
// For larger blocks, which will spill beyond the cache, it's faster to
// use the Streaming Store instruction MOVNTQ. This write instruction
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
align 16
$memcpy_uc_1: ; 64-byte blocks, uncached copy
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
movq mm0,[esi+0] ; read 64 bits
add edi,64 ; update destination pointer
movq mm1,[esi+8]
add esi,64 ; update source pointer
movq mm2,[esi-48]
movntq [edi-64], mm0 ; write 64 bits, bypassing the cache
movq mm0,[esi-40] ; note: movntq also prevents the CPU
movntq [edi-56], mm1 ; from READING the destination address
movq mm1,[esi-32] ; into the cache, only to be over-written
movntq [edi-48], mm2 ; so that also helps performance
movq mm2,[esi-24]
movntq [edi-40], mm0
movq mm0,[esi-16]
movntq [edi-32], mm1
movq mm1,[esi-8]
movntq [edi-24], mm2
movntq [edi-16], mm0
dec ecx
movntq [edi-8], mm1
jnz $memcpy_uc_1 ; last 64-byte block?
jmp $memcpy_ic_2 ; almost done
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch. The technique is great for
// getting maximum read bandwidth, especially in DDR memory systems.
$memcpy_bp_1: ; large blocks, block prefetch copy
cmp ecx, CACHEBLOCK ; big enough to run another prefetch loop?
jl $memcpy_64_test ; no, back to regular uncached copy
mov eax, CACHEBLOCK / 2 ; block prefetch loop, unrolled 2X
add esi, CACHEBLOCK * 64 ; move to the top of the block
align 16
$memcpy_bp_2:
mov edx, [esi-64] ; grab one address per cache line
mov edx, [esi-128] ; grab one address per cache line
sub esi, 128 ; go reverse order to suppress HW prefetcher
dec eax ; count down the cache lines
jnz $memcpy_bp_2 ; keep grabbing more lines into cache
mov eax, CACHEBLOCK ; now that it is in cache, do the copy
align 16
$memcpy_bp_3:
movq mm0, [esi ] ; read 64 bits
movq mm1, [esi+ 8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
add esi, 64 ; update source pointer
movntq [edi ], mm0 ; write 64 bits, bypassing cache
movntq [edi+ 8], mm1 ; note: movntq also prevents the CPU
movntq [edi+16], mm2 ; from READING the destination address
movntq [edi+24], mm3 ; into the cache, only to be over-written,
movntq [edi+32], mm4 ; so that also helps performance
movntq [edi+40], mm5
movntq [edi+48], mm6
movntq [edi+56], mm7
add edi, 64 ; update dest pointer
dec eax ; count down
jnz $memcpy_bp_3 ; keep copying
sub ecx, CACHEBLOCK ; update the 64-byte block count
jmp $memcpy_bp_1 ; keep processing chunks
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop". Then it handles the last few bytes.
align 4
movsd
movsd ; perform last 1-15 dword copies
movsd
movsd
movsd
movsd
movsd
movsd
movsd
movsd ; perform last 1-7 dword copies
movsd
movsd
movsd
movsd
movsd
movsd
$memcpy_last_few: ; dword aligned from before movsd''s
mov ecx, ebx ; has valid low 2 bits of the byte count
and ecx, 11b ; the last few cows must come home
jz $memcpy_final ; no more, lets leave
rep movsb ; the last 1, 2, or 3 bytes
$memcpy_final:
emms ; clean up the MMX state
sfence ; flush the write buffer
mov eax, [dest] ; ret value = destination pointer
}
}
// mmx memcmp implementation, size has to be a multiple of 8
// returns 0 is equal, nonzero value if not equal
// ~10 times faster than standard memcmp
// (zerofrog)
unsigned char memcmp_mmx(const void* src1, const void* src2, int cmpsize)
{
assert( (cmpsize&7) == 0 );
__asm {
push esi
mov ecx, cmpsize
mov edx, src1
mov esi, src2
cmp ecx, 32
jl Done4
// custom test first 8 to make sure things are ok
movq mm0, [esi]
movq mm1, [esi+8]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pand mm0, mm1
movq mm2, [esi+16]
pmovmskb eax, mm0
movq mm3, [esi+24]
// check if eq
cmp eax, 0xff
je NextComp
mov eax, 1
jmp End
NextComp:
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm2, mm3
pmovmskb eax, mm2
sub ecx, 32
add esi, 32
add edx, 32
// check if eq
cmp eax, 0xff
je ContinueTest
mov eax, 1
jmp End
cmp ecx, 64
jl Done8
Cmp8:
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm0, mm1
pcmpeqd mm4, [edx+32]
pand mm0, mm2
pcmpeqd mm5, [edx+40]
pand mm0, mm3
pcmpeqd mm6, [edx+48]
pand mm0, mm4
pcmpeqd mm7, [edx+56]
pand mm0, mm5
pand mm0, mm6
pand mm0, mm7
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
je Continue
mov eax, 1
jmp End
Continue:
sub ecx, 64
add esi, 64
add edx, 64
ContinueTest:
cmp ecx, 64
jge Cmp8
Done8:
test ecx, 0x20
jz Done4
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm0, mm1
pand mm0, mm2
pand mm0, mm3
pmovmskb eax, mm0
sub ecx, 32
add esi, 32
add edx, 32
// check if eq
cmp eax, 0xff
je Done4
mov eax, 1
jmp End
Done4:
cmp ecx, 24
jne Done2
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pand mm0, mm1
pand mm0, mm2
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
setne al
jmp End
Done2:
cmp ecx, 16
jne Done1
movq mm0, [esi]
movq mm1, [esi+8]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pand mm0, mm1
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
setne al
jmp End
Done1:
cmp ecx, 8
jne Done
mov eax, [esi]
mov esi, [esi+4]
cmp eax, [edx]
je Next
mov eax, 1
jmp End
Next:
cmp esi, [edx+4]
setne al
jmp End
Done:
xor eax, eax
End:
pop esi
emms
}
}
#else // _MSC_VER
// assume gcc or mingw or win x64
#include <memory.h>
#include <string.h>
void * memcpy_amd(void *dest, const void *src, size_t n)
{
memcpy(dest, src, n);
return dest;
}
#endif