mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-24 14:49:42 -06:00
GL Vertex loader moved to VideoCommon.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1692 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
@ -732,54 +732,6 @@
|
||||
RelativePath=".\Src\NativeVertexFormat.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AssemblerOutput="4"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_Color.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_Color.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_Normal.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_Normal.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_Position.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_Position.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_TextCoord.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoader_TextCoord.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\VertexLoaderManager.cpp"
|
||||
>
|
||||
@ -993,60 +945,6 @@
|
||||
RelativePath=".\Src\main.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\memcpy_amd.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
UsePrecompiledHeader="0"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderThrough=""
|
||||
PrecompiledHeaderFile=""
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
UsePrecompiledHeader="0"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
UsePrecompiledHeader="0"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="DebugFast|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
UsePrecompiledHeader="0"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="DebugFast|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
UsePrecompiledHeader="0"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\Render.cpp"
|
||||
>
|
||||
|
@ -25,16 +25,6 @@
|
||||
#include "Common.h"
|
||||
#include "Config.h"
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__x86_64__) && !defined(_M_X64)
|
||||
void * memcpy_amd(void *dest, const void *src, size_t n);
|
||||
unsigned char memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
||||
#define memcpy_gc memcpy_amd
|
||||
#define memcmp_gc memcmp_mmx
|
||||
#else
|
||||
#define memcpy_gc memcpy
|
||||
#define memcmp_gc memcmp
|
||||
#endif
|
||||
|
||||
#include "VideoCommon.h"
|
||||
|
||||
#endif
|
||||
|
@ -1,684 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "Globals.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "Common.h"
|
||||
#include "Config.h"
|
||||
#include "Profiler.h"
|
||||
#include "MemoryUtil.h"
|
||||
#include "StringUtil.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
#include "LookUpTables.h"
|
||||
#include "Statistics.h"
|
||||
#include "VertexLoaderManager.h"
|
||||
#include "VertexLoader.h"
|
||||
#include "BPMemory.h"
|
||||
#include "DataReader.h"
|
||||
#include "NativeVertexWriter.h"
|
||||
|
||||
#include "VertexLoader_Position.h"
|
||||
#include "VertexLoader_Normal.h"
|
||||
#include "VertexLoader_Color.h"
|
||||
#include "VertexLoader_TextCoord.h"
|
||||
|
||||
#define USE_JIT
|
||||
|
||||
#define COMPILED_CODE_SIZE 4096
|
||||
|
||||
NativeVertexFormat *g_nativeVertexFmt;
|
||||
|
||||
#ifndef _WIN32
|
||||
#undef inline
|
||||
#define inline
|
||||
#endif
|
||||
|
||||
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
|
||||
// when decoding each vertex.
|
||||
static u8 s_curposmtx;
|
||||
static u8 s_curtexmtx[8];
|
||||
static int s_texmtxwrite = 0;
|
||||
static int s_texmtxread = 0;
|
||||
|
||||
static int loop_counter;
|
||||
|
||||
// Vertex loaders read these. Although the scale ones should be baked into the shader.
|
||||
int tcIndex;
|
||||
int colIndex;
|
||||
TVtxAttr* pVtxAttr;
|
||||
int colElements[2];
|
||||
float posScale;
|
||||
float tcScale[8];
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
void LOADERDECL PosMtx_ReadDirect_UByte()
|
||||
{
|
||||
s_curposmtx = DataReadU8() & 0x3f;
|
||||
PRIM_LOG("posmtx: %d, ", s_curposmtx);
|
||||
}
|
||||
|
||||
void LOADERDECL PosMtx_Write()
|
||||
{
|
||||
*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
|
||||
*VertexManager::s_pCurBufferPointer++ = 0;
|
||||
*VertexManager::s_pCurBufferPointer++ = 0;
|
||||
*VertexManager::s_pCurBufferPointer++ = 0;
|
||||
}
|
||||
|
||||
void LOADERDECL TexMtx_ReadDirect_UByte()
|
||||
{
|
||||
s_curtexmtx[s_texmtxread] = DataReadU8()&0x3f;
|
||||
PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]);
|
||||
s_texmtxread++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexMtx_Write_Float()
|
||||
{
|
||||
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
}
|
||||
|
||||
void LOADERDECL TexMtx_Write_Float2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
}
|
||||
|
||||
void LOADERDECL TexMtx_Write_Short3()
|
||||
{
|
||||
((s16*)VertexManager::s_pCurBufferPointer)[0] = 0;
|
||||
((s16*)VertexManager::s_pCurBufferPointer)[1] = 0;
|
||||
((s16*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
}
|
||||
|
||||
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||
{
|
||||
m_compiledCode = NULL;
|
||||
m_numLoadedVertices = 0;
|
||||
m_VertexSize = 0;
|
||||
m_numPipelineStages = 0;
|
||||
m_NativeFmt = NativeVertexFormat::Create();
|
||||
loop_counter = 0;
|
||||
VertexLoader_Normal::Init();
|
||||
|
||||
m_VtxDesc = vtx_desc;
|
||||
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
|
||||
|
||||
AllocCodeSpace(COMPILED_CODE_SIZE);
|
||||
CompileVertexTranslator();
|
||||
WriteProtect();
|
||||
}
|
||||
|
||||
VertexLoader::~VertexLoader()
|
||||
{
|
||||
FreeCodeSpace();
|
||||
delete m_NativeFmt;
|
||||
}
|
||||
|
||||
void VertexLoader::CompileVertexTranslator()
|
||||
{
|
||||
m_VertexSize = 0;
|
||||
const TVtxAttr &vtx_attr = m_VtxAttr;
|
||||
|
||||
#ifdef USE_JIT
|
||||
if (m_compiledCode)
|
||||
PanicAlert("trying to recompile a vtx translator");
|
||||
|
||||
m_compiledCode = GetCodePtr();
|
||||
ABI_EmitPrologue(4);
|
||||
|
||||
// Start loop here
|
||||
const u8 *loop_start = GetCodePtr();
|
||||
|
||||
// Reset component counters if present in vertex format only.
|
||||
if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord ||
|
||||
m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) {
|
||||
MOV(32, M(&tcIndex), Imm32(0));
|
||||
}
|
||||
if (m_VtxDesc.Color0 || m_VtxDesc.Color1) {
|
||||
MOV(32, M(&colIndex), Imm32(0));
|
||||
}
|
||||
if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx ||
|
||||
m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) {
|
||||
MOV(32, M(&s_texmtxwrite), Imm32(0));
|
||||
MOV(32, M(&s_texmtxread), Imm32(0));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Colors
|
||||
const int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
|
||||
// TextureCoord
|
||||
// Since m_VtxDesc.Text7Coord is broken across a 32 bit word boundary, retrieve its value manually.
|
||||
// If we didn't do this, the vertex format would be read as one bit offset from where it should be, making
|
||||
// 01 become 00, and 10/11 become 01
|
||||
const int tc[8] = {
|
||||
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
|
||||
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, (m_VtxDesc.Hex >> 31) & 3
|
||||
};
|
||||
|
||||
// Reset pipeline
|
||||
m_numPipelineStages = 0;
|
||||
|
||||
// It's a bit ugly that we poke inside m_NativeFmt in this function. Planning to fix this.
|
||||
m_NativeFmt->m_components = 0;
|
||||
|
||||
// Position in pc vertex format.
|
||||
int nat_offset = 0;
|
||||
PortableVertexDeclaration vtx_decl;
|
||||
memset(&vtx_decl, 0, sizeof(vtx_decl));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
vtx_decl.texcoord_offset[i] = -1;
|
||||
}
|
||||
|
||||
// m_VBVertexStride for texmtx and posmtx is computed later when writing.
|
||||
|
||||
// Position Matrix Index
|
||||
if (m_VtxDesc.PosMatIdx) {
|
||||
WriteCall(PosMtx_ReadDirect_UByte);
|
||||
m_NativeFmt->m_components |= VB_HAS_POSMTXIDX;
|
||||
m_VertexSize += 1;
|
||||
}
|
||||
|
||||
if (m_VtxDesc.Tex0MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX0; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex1MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX1; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex2MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX2; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex3MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX3; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex4MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX4; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex5MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX5; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex6MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX6; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
|
||||
switch (m_VtxDesc.Position) {
|
||||
case NOT_PRESENT: {_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break;
|
||||
case DIRECT:
|
||||
switch (m_VtxAttr.PosFormat) {
|
||||
case FORMAT_UBYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_UByte); break;
|
||||
case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_Byte); break;
|
||||
case FORMAT_USHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_UShort); break;
|
||||
case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_Short); break;
|
||||
case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; WriteCall(Pos_ReadDirect_Float); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
nat_offset += 12;
|
||||
break;
|
||||
case INDEX8:
|
||||
switch (m_VtxAttr.PosFormat) {
|
||||
case FORMAT_UBYTE: WriteCall(Pos_ReadIndex8_UByte); break; //WTF?
|
||||
case FORMAT_BYTE: WriteCall(Pos_ReadIndex8_Byte); break;
|
||||
case FORMAT_USHORT: WriteCall(Pos_ReadIndex8_UShort); break;
|
||||
case FORMAT_SHORT: WriteCall(Pos_ReadIndex8_Short); break;
|
||||
case FORMAT_FLOAT: WriteCall(Pos_ReadIndex8_Float); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
m_VertexSize += 1;
|
||||
nat_offset += 12;
|
||||
break;
|
||||
case INDEX16:
|
||||
switch (m_VtxAttr.PosFormat) {
|
||||
case FORMAT_UBYTE: WriteCall(Pos_ReadIndex16_UByte); break;
|
||||
case FORMAT_BYTE: WriteCall(Pos_ReadIndex16_Byte); break;
|
||||
case FORMAT_USHORT: WriteCall(Pos_ReadIndex16_UShort); break;
|
||||
case FORMAT_SHORT: WriteCall(Pos_ReadIndex16_Short); break;
|
||||
case FORMAT_FLOAT: WriteCall(Pos_ReadIndex16_Float); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
m_VertexSize += 2;
|
||||
nat_offset += 12;
|
||||
break;
|
||||
}
|
||||
|
||||
// Normals
|
||||
vtx_decl.num_normals = 0;
|
||||
if (m_VtxDesc.Normal != NOT_PRESENT) {
|
||||
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
|
||||
TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
|
||||
if (pFunc == 0)
|
||||
{
|
||||
char temp[256];
|
||||
sprintf(temp,"%i %i %i %i", m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
|
||||
g_VideoInitialize.pSysMessage("VertexLoader_Normal::GetFunction returned zero!");
|
||||
}
|
||||
WriteCall(pFunc);
|
||||
|
||||
vtx_decl.num_normals = vtx_attr.NormalElements ? 3 : 1;
|
||||
switch (vtx_attr.NormalFormat) {
|
||||
case FORMAT_UBYTE:
|
||||
case FORMAT_BYTE:
|
||||
vtx_decl.normal_gl_type = VAR_BYTE;
|
||||
vtx_decl.normal_gl_size = 4;
|
||||
vtx_decl.normal_offset[0] = nat_offset;
|
||||
nat_offset += 4;
|
||||
if (vtx_attr.NormalElements) {
|
||||
vtx_decl.normal_offset[1] = nat_offset;
|
||||
nat_offset += 4;
|
||||
vtx_decl.normal_offset[2] = nat_offset;
|
||||
nat_offset += 4;
|
||||
}
|
||||
break;
|
||||
case FORMAT_USHORT:
|
||||
case FORMAT_SHORT:
|
||||
vtx_decl.normal_gl_type = VAR_SHORT;
|
||||
vtx_decl.normal_gl_size = 4;
|
||||
vtx_decl.normal_offset[0] = nat_offset;
|
||||
nat_offset += 8;
|
||||
if (vtx_attr.NormalElements) {
|
||||
vtx_decl.normal_offset[1] = nat_offset;
|
||||
nat_offset += 8;
|
||||
vtx_decl.normal_offset[2] = nat_offset;
|
||||
nat_offset += 8;
|
||||
}
|
||||
break;
|
||||
case FORMAT_FLOAT:
|
||||
vtx_decl.normal_gl_type = VAR_FLOAT;
|
||||
vtx_decl.normal_gl_size = 3;
|
||||
vtx_decl.normal_offset[0] = nat_offset;
|
||||
nat_offset += 12;
|
||||
if (vtx_attr.NormalElements) {
|
||||
vtx_decl.normal_offset[1] = nat_offset;
|
||||
nat_offset += 12;
|
||||
vtx_decl.normal_offset[2] = nat_offset;
|
||||
nat_offset += 12;
|
||||
}
|
||||
break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
|
||||
int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE;
|
||||
m_NativeFmt->m_components |= VB_HAS_NRM0;
|
||||
|
||||
if (numNormals == NRM_THREE)
|
||||
m_NativeFmt->m_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
|
||||
}
|
||||
|
||||
vtx_decl.color_gl_type = VAR_UNSIGNED_BYTE;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
m_NativeFmt->m_components |= VB_HAS_COL0 << i;
|
||||
switch (col[i])
|
||||
{
|
||||
case NOT_PRESENT:
|
||||
m_NativeFmt->m_components &= ~(VB_HAS_COL0 << i);
|
||||
vtx_decl.color_offset[i] = -1;
|
||||
break;
|
||||
case DIRECT:
|
||||
switch (m_VtxAttr.color[i].Comp)
|
||||
{
|
||||
case FORMAT_16B_565: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_565); break;
|
||||
case FORMAT_24B_888: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_888); break;
|
||||
case FORMAT_32B_888x: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_888x); break;
|
||||
case FORMAT_16B_4444: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_4444); break;
|
||||
case FORMAT_24B_6666: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_6666); break;
|
||||
case FORMAT_32B_8888: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_8888); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
break;
|
||||
case INDEX8:
|
||||
m_VertexSize += 1;
|
||||
switch (m_VtxAttr.color[i].Comp)
|
||||
{
|
||||
case FORMAT_16B_565: WriteCall(Color_ReadIndex8_16b_565); break;
|
||||
case FORMAT_24B_888: WriteCall(Color_ReadIndex8_24b_888); break;
|
||||
case FORMAT_32B_888x: WriteCall(Color_ReadIndex8_32b_888x); break;
|
||||
case FORMAT_16B_4444: WriteCall(Color_ReadIndex8_16b_4444); break;
|
||||
case FORMAT_24B_6666: WriteCall(Color_ReadIndex8_24b_6666); break;
|
||||
case FORMAT_32B_8888: WriteCall(Color_ReadIndex8_32b_8888); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
break;
|
||||
case INDEX16:
|
||||
m_VertexSize += 2;
|
||||
switch (m_VtxAttr.color[i].Comp)
|
||||
{
|
||||
case FORMAT_16B_565: WriteCall(Color_ReadIndex16_16b_565); break;
|
||||
case FORMAT_24B_888: WriteCall(Color_ReadIndex16_24b_888); break;
|
||||
case FORMAT_32B_888x: WriteCall(Color_ReadIndex16_32b_888x); break;
|
||||
case FORMAT_16B_4444: WriteCall(Color_ReadIndex16_16b_4444); break;
|
||||
case FORMAT_24B_6666: WriteCall(Color_ReadIndex16_24b_6666); break;
|
||||
case FORMAT_32B_8888: WriteCall(Color_ReadIndex16_32b_8888); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Common for the three bottom cases
|
||||
if (col[i] != NOT_PRESENT) {
|
||||
vtx_decl.color_offset[i] = nat_offset;
|
||||
nat_offset += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
|
||||
for (int i = 0; i < 8; i++) {
|
||||
m_NativeFmt->m_components |= VB_HAS_UV0 << i;
|
||||
int elements = m_VtxAttr.texCoord[i].Elements;
|
||||
switch (tc[i])
|
||||
{
|
||||
case NOT_PRESENT:
|
||||
m_NativeFmt->m_components &= ~(VB_HAS_UV0 << i);
|
||||
break;
|
||||
case DIRECT:
|
||||
switch (m_VtxAttr.texCoord[i].Format)
|
||||
{
|
||||
case FORMAT_UBYTE: m_VertexSize += elements?2:1; WriteCall(elements?TexCoord_ReadDirect_UByte2:TexCoord_ReadDirect_UByte1); break;
|
||||
case FORMAT_BYTE: m_VertexSize += elements?2:1; WriteCall(elements?TexCoord_ReadDirect_Byte2:TexCoord_ReadDirect_Byte1); break;
|
||||
case FORMAT_USHORT: m_VertexSize += elements?4:2; WriteCall(elements?TexCoord_ReadDirect_UShort2:TexCoord_ReadDirect_UShort1); break;
|
||||
case FORMAT_SHORT: m_VertexSize += elements?4:2; WriteCall(elements?TexCoord_ReadDirect_Short2:TexCoord_ReadDirect_Short1); break;
|
||||
case FORMAT_FLOAT: m_VertexSize += elements?8:4; WriteCall(elements?TexCoord_ReadDirect_Float2:TexCoord_ReadDirect_Float1); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
break;
|
||||
case INDEX8:
|
||||
m_VertexSize += 1;
|
||||
switch (m_VtxAttr.texCoord[i].Format)
|
||||
{
|
||||
case FORMAT_UBYTE: WriteCall(elements?TexCoord_ReadIndex8_UByte2:TexCoord_ReadIndex8_UByte1); break;
|
||||
case FORMAT_BYTE: WriteCall(elements?TexCoord_ReadIndex8_Byte2:TexCoord_ReadIndex8_Byte1); break;
|
||||
case FORMAT_USHORT: WriteCall(elements?TexCoord_ReadIndex8_UShort2:TexCoord_ReadIndex8_UShort1); break;
|
||||
case FORMAT_SHORT: WriteCall(elements?TexCoord_ReadIndex8_Short2:TexCoord_ReadIndex8_Short1); break;
|
||||
case FORMAT_FLOAT: WriteCall(elements?TexCoord_ReadIndex8_Float2:TexCoord_ReadIndex8_Float1); break;
|
||||
default: _assert_(0); break;
|
||||
}
|
||||
break;
|
||||
case INDEX16:
|
||||
m_VertexSize += 2;
|
||||
switch (m_VtxAttr.texCoord[i].Format)
|
||||
{
|
||||
case FORMAT_UBYTE: WriteCall(elements?TexCoord_ReadIndex16_UByte2:TexCoord_ReadIndex16_UByte1); break;
|
||||
case FORMAT_BYTE: WriteCall(elements?TexCoord_ReadIndex16_Byte2:TexCoord_ReadIndex16_Byte1); break;
|
||||
case FORMAT_USHORT: WriteCall(elements?TexCoord_ReadIndex16_UShort2:TexCoord_ReadIndex16_UShort1); break;
|
||||
case FORMAT_SHORT: WriteCall(elements?TexCoord_ReadIndex16_Short2:TexCoord_ReadIndex16_Short1); break;
|
||||
case FORMAT_FLOAT: WriteCall(elements?TexCoord_ReadIndex16_Float2:TexCoord_ReadIndex16_Float1); break;
|
||||
default: _assert_(0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (m_NativeFmt->m_components & (VB_HAS_TEXMTXIDX0 << i)) {
|
||||
if (tc[i] != NOT_PRESENT) {
|
||||
// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
|
||||
vtx_decl.texcoord_offset[i] = nat_offset;
|
||||
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
|
||||
vtx_decl.texcoord_size[i] = 3;
|
||||
nat_offset += 12;
|
||||
WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
|
||||
}
|
||||
else {
|
||||
m_NativeFmt->m_components |= VB_HAS_UV0 << i; // have to include since using now
|
||||
vtx_decl.texcoord_offset[i] = nat_offset;
|
||||
vtx_decl.texcoord_gl_type[i] = VAR_SHORT;
|
||||
vtx_decl.texcoord_size[i] = 4;
|
||||
nat_offset += 8; // still include the texture coordinate, but this time as 6 + 2 bytes
|
||||
WriteCall(TexMtx_Write_Short3);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (tc[i] != NOT_PRESENT) {
|
||||
vtx_decl.texcoord_offset[i] = nat_offset;
|
||||
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
|
||||
vtx_decl.texcoord_size[i] = vtx_attr.texCoord[i].Elements ? 2 : 1;
|
||||
nat_offset += 4 * (vtx_attr.texCoord[i].Elements ? 2 : 1);
|
||||
} else {
|
||||
vtx_decl.texcoord_offset[i] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (tc[i] == NOT_PRESENT) {
|
||||
// if there's more tex coords later, have to write a dummy call
|
||||
int j = i + 1;
|
||||
for (; j < 8; ++j) {
|
||||
if (tc[j] != NOT_PRESENT) {
|
||||
WriteCall(TexCoord_Read_Dummy); // important to get indices right!
|
||||
break;
|
||||
}
|
||||
}
|
||||
// tricky!
|
||||
if (j == 8 && !((m_NativeFmt->m_components & VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL << (i + 1)))) {
|
||||
// no more tex coords and tex matrices, so exit loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_VtxDesc.PosMatIdx) {
|
||||
WriteCall(PosMtx_Write);
|
||||
vtx_decl.posmtx_offset = nat_offset;
|
||||
nat_offset += 4;
|
||||
} else {
|
||||
vtx_decl.posmtx_offset = -1;
|
||||
}
|
||||
|
||||
native_stride = nat_offset;
|
||||
vtx_decl.stride = native_stride;
|
||||
|
||||
#ifdef USE_JIT
|
||||
// End loop here
|
||||
SUB(32, M(&loop_counter), Imm8(1));
|
||||
//SUB(32, R(EBX), Imm8(1));
|
||||
J_CC(CC_NZ, loop_start, true);
|
||||
ABI_EmitEpilogue(4);
|
||||
#endif
|
||||
m_NativeFmt->Initialize(vtx_decl);
|
||||
}
|
||||
|
||||
void VertexLoader::WriteCall(TPipelineFunction func)
|
||||
{
|
||||
#ifdef USE_JIT
|
||||
CALL((void*)func);
|
||||
#else
|
||||
m_PipelineStages[m_numPipelineStages++] = func;
|
||||
#endif
|
||||
}
|
||||
|
||||
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
||||
{
|
||||
DVSTARTPROFILE();
|
||||
|
||||
m_numLoadedVertices += count;
|
||||
|
||||
// Flush if our vertex format is different from the currently set.
|
||||
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
|
||||
{
|
||||
VertexManager::Flush();
|
||||
// Also move the Set() here?
|
||||
}
|
||||
g_nativeVertexFmt = m_NativeFmt;
|
||||
|
||||
if (bpmem.genMode.cullmode == 3 && primitive < 5)
|
||||
{
|
||||
// if cull mode is none, ignore triangles and quads
|
||||
DataSkip(count * m_VertexSize);
|
||||
return;
|
||||
}
|
||||
|
||||
VertexManager::EnableComponents(m_NativeFmt->m_components);
|
||||
|
||||
// Load position and texcoord scale factors.
|
||||
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
|
||||
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
|
||||
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
|
||||
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
|
||||
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
|
||||
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
|
||||
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
|
||||
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
|
||||
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
||||
|
||||
pVtxAttr = &m_VtxAttr;
|
||||
posScale = shiftLookup[m_VtxAttr.PosFrac];
|
||||
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
||||
for (int i = 0; i < 8; i++)
|
||||
tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
|
||||
for (int i = 0; i < 2; i++)
|
||||
colElements[i] = m_VtxAttr.color[i].Elements;
|
||||
|
||||
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
|
||||
int granularity = 1;
|
||||
switch (primitive) {
|
||||
case 3: // strip .. hm, weird
|
||||
case 4: // fan
|
||||
if (VertexManager::GetRemainingSize() < 3 * native_stride)
|
||||
VertexManager::Flush();
|
||||
break;
|
||||
case 6: // line strip
|
||||
if (VertexManager::GetRemainingSize() < 2 * native_stride)
|
||||
VertexManager::Flush();
|
||||
break;
|
||||
case 0: granularity = 4; break; // quads
|
||||
case 2: granularity = 3; break; // tris
|
||||
case 5: granularity = 2; break; // lines
|
||||
}
|
||||
|
||||
int startv = 0, extraverts = 0;
|
||||
int v = 0;
|
||||
|
||||
while (v < count)
|
||||
{
|
||||
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
|
||||
if (remainingVerts < granularity) {
|
||||
INCSTAT(stats.thisFrame.numBufferSplits);
|
||||
// This buffer full - break current primitive and flush, to switch to the next buffer.
|
||||
u8* plastptr = VertexManager::s_pCurBufferPointer;
|
||||
if (v - startv > 0)
|
||||
VertexManager::AddVertices(primitive, v - startv + extraverts);
|
||||
VertexManager::Flush();
|
||||
// Why does this need to be so complicated?
|
||||
switch (primitive) {
|
||||
case 3: // triangle strip, copy last two vertices
|
||||
// a little trick since we have to keep track of signs
|
||||
if (v & 1) {
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride*3;
|
||||
extraverts = 3;
|
||||
}
|
||||
else {
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
|
||||
VertexManager::s_pCurBufferPointer += native_stride*2;
|
||||
extraverts = 2;
|
||||
}
|
||||
break;
|
||||
case 4: // tri fan, copy first and last vert
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
extraverts = 2;
|
||||
break;
|
||||
case 6: // line strip
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
extraverts = 1;
|
||||
break;
|
||||
default:
|
||||
extraverts = 0;
|
||||
break;
|
||||
}
|
||||
startv = v;
|
||||
}
|
||||
int remainingPrims = remainingVerts / granularity;
|
||||
remainingVerts = remainingPrims * granularity;
|
||||
if (count - v < remainingVerts)
|
||||
remainingVerts = count - v;
|
||||
|
||||
#ifdef USE_JIT
|
||||
if (remainingVerts > 0) {
|
||||
loop_counter = remainingVerts;
|
||||
((void (*)())(void*)m_compiledCode)();
|
||||
}
|
||||
#else
|
||||
for (int s = 0; s < remainingVerts; s++)
|
||||
{
|
||||
tcIndex = 0;
|
||||
colIndex = 0;
|
||||
s_texmtxwrite = s_texmtxread = 0;
|
||||
for (int i = 0; i < m_numPipelineStages; i++)
|
||||
m_PipelineStages[i]();
|
||||
PRIM_LOG("\n");
|
||||
}
|
||||
#endif
|
||||
v += remainingVerts;
|
||||
}
|
||||
|
||||
if (startv < count)
|
||||
VertexManager::AddVertices(primitive, count - startv + extraverts);
|
||||
}
|
||||
|
||||
void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
|
||||
{
|
||||
VAT vat;
|
||||
vat.g0.Hex = _group0;
|
||||
vat.g1.Hex = _group1;
|
||||
vat.g2.Hex = _group2;
|
||||
|
||||
m_VtxAttr.PosElements = vat.g0.PosElements;
|
||||
m_VtxAttr.PosFormat = vat.g0.PosFormat;
|
||||
m_VtxAttr.PosFrac = vat.g0.PosFrac;
|
||||
m_VtxAttr.NormalElements = vat.g0.NormalElements;
|
||||
m_VtxAttr.NormalFormat = vat.g0.NormalFormat;
|
||||
m_VtxAttr.color[0].Elements = vat.g0.Color0Elements;
|
||||
m_VtxAttr.color[0].Comp = vat.g0.Color0Comp;
|
||||
m_VtxAttr.color[1].Elements = vat.g0.Color1Elements;
|
||||
m_VtxAttr.color[1].Comp = vat.g0.Color1Comp;
|
||||
m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements;
|
||||
m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat;
|
||||
m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac;
|
||||
m_VtxAttr.ByteDequant = vat.g0.ByteDequant;
|
||||
m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3;
|
||||
|
||||
m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements;
|
||||
m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat;
|
||||
m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac;
|
||||
m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements;
|
||||
m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat;
|
||||
m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac;
|
||||
m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements;
|
||||
m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat;
|
||||
m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac;
|
||||
m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements;
|
||||
m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat;
|
||||
|
||||
m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac;
|
||||
m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements;
|
||||
m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat;
|
||||
m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac;
|
||||
m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements;
|
||||
m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat;
|
||||
m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac;
|
||||
m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements;
|
||||
m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat;
|
||||
m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac;
|
||||
};
|
||||
|
||||
void VertexLoader::AppendToString(std::string *dest) {
|
||||
static const char *posMode[4] = {
|
||||
"Invalid",
|
||||
"Direct",
|
||||
"Idx8",
|
||||
"Idx16",
|
||||
};
|
||||
static const char *posFormats[5] = {
|
||||
"u8", "s8", "u16", "s16", "flt",
|
||||
};
|
||||
dest->append(StringFromFormat("sz: %i skin: %i Pos: %i %s %s Nrm: %i %s %s - %i vtx\n",
|
||||
m_VertexSize, m_VtxDesc.PosMatIdx, m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat],
|
||||
m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat], m_numLoadedVertices));
|
||||
}
|
@ -1,101 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _VERTEXLOADER_H
|
||||
#define _VERTEXLOADER_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "CPMemory.h"
|
||||
#include "DataReader.h"
|
||||
#include "NativeVertexFormat.h"
|
||||
|
||||
#include "x64Emitter.h"
|
||||
|
||||
class VertexLoaderUID
|
||||
{
|
||||
u32 vid[5];
|
||||
public:
|
||||
VertexLoaderUID() {}
|
||||
void InitFromCurrentState(int vtx_attr_group) {
|
||||
vid[0] = g_VtxDesc.Hex & 0xFFFFFFFF;
|
||||
vid[1] = g_VtxDesc.Hex >> 32;
|
||||
vid[2] = g_VtxAttr[vtx_attr_group].g0.Hex & ~VAT_0_FRACBITS;
|
||||
vid[3] = g_VtxAttr[vtx_attr_group].g1.Hex & ~VAT_1_FRACBITS;
|
||||
vid[4] = g_VtxAttr[vtx_attr_group].g2.Hex & ~VAT_2_FRACBITS;
|
||||
}
|
||||
bool operator < (const VertexLoaderUID &other) const {
|
||||
if (vid[0] < other.vid[0])
|
||||
return true;
|
||||
else if (vid[0] > other.vid[0])
|
||||
return false;
|
||||
for (int i = 1; i < 5; ++i) {
|
||||
if (vid[i] < other.vid[i])
|
||||
return true;
|
||||
else if (vid[i] > other.vid[i])
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class VertexLoader : public Gen::XCodeBlock
|
||||
{
|
||||
public:
|
||||
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
||||
~VertexLoader();
|
||||
|
||||
int GetVertexSize() const {return m_VertexSize;}
|
||||
void RunVertices(int vtx_attr_group, int primitive, int count);
|
||||
|
||||
// For debugging / profiling
|
||||
void AppendToString(std::string *dest);
|
||||
|
||||
private:
|
||||
enum
|
||||
{
|
||||
NRM_ZERO = 0,
|
||||
NRM_ONE = 1,
|
||||
NRM_THREE = 3,
|
||||
};
|
||||
|
||||
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
|
||||
|
||||
// GC vertex format
|
||||
TVtxAttr m_VtxAttr; // VAT decoded into easy format
|
||||
TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided.
|
||||
|
||||
// PC vertex format
|
||||
NativeVertexFormat *m_NativeFmt;
|
||||
int native_stride;
|
||||
|
||||
// Pipeline. To be JIT compiled in the future.
|
||||
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
|
||||
int m_numPipelineStages;
|
||||
|
||||
const u8 *m_compiledCode;
|
||||
|
||||
int m_numLoadedVertices;
|
||||
|
||||
void SetVAT(u32 _group0, u32 _group1, u32 _group2);
|
||||
|
||||
void CompileVertexTranslator();
|
||||
|
||||
void WriteCall(TPipelineFunction);
|
||||
};
|
||||
|
||||
#endif
|
@ -1,232 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _VERTEXLOADERCOLOR_H
|
||||
#define _VERTEXLOADERCOLOR_H
|
||||
|
||||
#include "Common.h"
|
||||
#include "VideoCommon.h"
|
||||
#include "LookUpTables.h"
|
||||
#include "VertexLoader.h"
|
||||
#include "VertexLoader_Color.h"
|
||||
#include "NativeVertexWriter.h"
|
||||
|
||||
#define RSHIFT 0
|
||||
#define GSHIFT 8
|
||||
#define BSHIFT 16
|
||||
#define ASHIFT 24
|
||||
|
||||
extern int colIndex;
|
||||
extern int colElements[2];
|
||||
|
||||
inline void _SetCol(u32 val)
|
||||
{
|
||||
*(u32*)VertexManager::s_pCurBufferPointer = val;
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
colIndex++;
|
||||
}
|
||||
|
||||
void _SetCol4444(u16 val)
|
||||
{
|
||||
u32 col = lut4to8[(val>>0)&0xF]<<ASHIFT;
|
||||
col |= lut4to8[(val>>12)&0xF] <<RSHIFT;
|
||||
col |= lut4to8[(val>>8)&0xF] <<GSHIFT;
|
||||
col |= lut4to8[(val>>4)&0xF] <<BSHIFT;
|
||||
_SetCol(col);
|
||||
}
|
||||
|
||||
void _SetCol6666(u32 val)
|
||||
{
|
||||
u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT;
|
||||
col |= lut6to8[(val>>12)&0x3F] << GSHIFT;
|
||||
col |= lut6to8[(val>>6)&0x3F] << BSHIFT;
|
||||
col |= lut6to8[(val>>0)&0x3F] << ASHIFT;
|
||||
_SetCol(col);
|
||||
}
|
||||
|
||||
void _SetCol565(u16 val)
|
||||
{
|
||||
u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT;
|
||||
col |= lut6to8[(val>>5 )&0x3f] << GSHIFT;
|
||||
col |= lut5to8[(val )&0x1f] << BSHIFT;
|
||||
_SetCol(col | (0xFF<<ASHIFT));
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
inline u32 _Read24(u32 iAddress)
|
||||
{
|
||||
u32 col = Memory_Read_U8(iAddress) << RSHIFT; //should just get a pointer to main memory instead of going thru slow memhandler
|
||||
col |= Memory_Read_U8(iAddress+1) << GSHIFT; //we can guarantee that it is reading from main memory
|
||||
col |= Memory_Read_U8(iAddress+2) << BSHIFT;
|
||||
return col | (0xFF<<ASHIFT);
|
||||
}
|
||||
|
||||
inline u32 _Read32(u32 iAddress)
|
||||
{
|
||||
u32 col = Memory_Read_U8(iAddress) << RSHIFT; //should just get a pointer to main memory instead of going thru slow memhandler
|
||||
col |= Memory_Read_U8(iAddress+1) << GSHIFT; //we can guarantee that it is reading from main memory
|
||||
col |= Memory_Read_U8(iAddress+2) << BSHIFT;
|
||||
col |= Memory_Read_U8(iAddress+3) << ASHIFT;
|
||||
return col;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void LOADERDECL Color_ReadDirect_24b_888()
|
||||
{
|
||||
u32 col = DataReadU8()<<RSHIFT;
|
||||
col |= DataReadU8()<<GSHIFT;
|
||||
col |= DataReadU8()<<BSHIFT;
|
||||
_SetCol(col | (0xFF<<ASHIFT));
|
||||
}
|
||||
|
||||
void LOADERDECL Color_ReadDirect_32b_888x(){
|
||||
u32 col = DataReadU8()<<RSHIFT;
|
||||
col |= DataReadU8()<<GSHIFT;
|
||||
col |= DataReadU8()<<BSHIFT;
|
||||
_SetCol(col | (0xFF<<ASHIFT));
|
||||
DataReadU8();
|
||||
}
|
||||
void LOADERDECL Color_ReadDirect_16b_565()
|
||||
{
|
||||
_SetCol565(DataReadU16());
|
||||
}
|
||||
void LOADERDECL Color_ReadDirect_16b_4444()
|
||||
{
|
||||
_SetCol4444(DataReadU16());
|
||||
}
|
||||
void LOADERDECL Color_ReadDirect_24b_6666()
|
||||
{
|
||||
u32 val = DataReadU8()<<16;
|
||||
val|=DataReadU8()<<8;
|
||||
val|=DataReadU8();
|
||||
_SetCol6666(val);
|
||||
}
|
||||
|
||||
// F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only
|
||||
// at least it fixes mario party 4
|
||||
//
|
||||
// if (colElements[colIndex])
|
||||
// else
|
||||
// col |= 0xFF<<ASHIFT;
|
||||
//
|
||||
void LOADERDECL Color_ReadDirect_32b_8888()
|
||||
{
|
||||
// TODO (mb2): check this
|
||||
u32 col = DataReadU8()<<RSHIFT;
|
||||
col |= DataReadU8()<<GSHIFT;
|
||||
col |= DataReadU8()<<BSHIFT;
|
||||
col |= DataReadU8()<<ASHIFT;
|
||||
|
||||
// "kill" the alpha
|
||||
if (!colElements[colIndex])
|
||||
col |= 0xFF<<ASHIFT;
|
||||
|
||||
_SetCol(col);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void LOADERDECL Color_ReadIndex8_16b_565()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
u16 val = Memory_Read_U16(iAddress);
|
||||
_SetCol565(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_24b_888()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_32b_888x()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR]+colIndex);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_16b_4444()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
u16 val = Memory_Read_U16(iAddress);
|
||||
_SetCol4444(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_24b_6666()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
u32 val = Memory_Read_U8(iAddress+2) |
|
||||
(Memory_Read_U8(iAddress+1)<<8) |
|
||||
(Memory_Read_U8(iAddress)<<16);
|
||||
|
||||
_SetCol6666(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_32b_8888()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read32(iAddress));
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void LOADERDECL Color_ReadIndex16_16b_565()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
u16 val = Memory_Read_U16(iAddress);
|
||||
_SetCol565(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_24b_888()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_32b_888x()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_16b_4444()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
u16 val = Memory_Read_U16(iAddress);
|
||||
_SetCol4444(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_24b_6666()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
u32 val = Memory_Read_U8(iAddress+2) |
|
||||
(Memory_Read_U8(iAddress+1)<<8) |
|
||||
(Memory_Read_U8(iAddress)<<16);
|
||||
_SetCol6666(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_32b_8888()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read32(iAddress));
|
||||
}
|
||||
|
||||
#endif
|
@ -1,42 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _VERTEXLOADERCOLOR_H
|
||||
#define _VERTEXLOADERCOLOR_H
|
||||
|
||||
void LOADERDECL Color_ReadDirect_24b_888();
|
||||
void LOADERDECL Color_ReadDirect_32b_888x();
|
||||
void LOADERDECL Color_ReadDirect_16b_565();
|
||||
void LOADERDECL Color_ReadDirect_16b_4444();
|
||||
void LOADERDECL Color_ReadDirect_24b_6666();
|
||||
void LOADERDECL Color_ReadDirect_32b_8888();
|
||||
|
||||
void LOADERDECL Color_ReadIndex8_16b_565();
|
||||
void LOADERDECL Color_ReadIndex8_24b_888();
|
||||
void LOADERDECL Color_ReadIndex8_32b_888x();
|
||||
void LOADERDECL Color_ReadIndex8_16b_4444();
|
||||
void LOADERDECL Color_ReadIndex8_24b_6666();
|
||||
void LOADERDECL Color_ReadIndex8_32b_8888();
|
||||
|
||||
void LOADERDECL Color_ReadIndex16_16b_565();
|
||||
void LOADERDECL Color_ReadIndex16_24b_888();
|
||||
void LOADERDECL Color_ReadIndex16_32b_888x();
|
||||
void LOADERDECL Color_ReadIndex16_16b_4444();
|
||||
void LOADERDECL Color_ReadIndex16_24b_6666();
|
||||
void LOADERDECL Color_ReadIndex16_32b_8888();
|
||||
|
||||
#endif
|
@ -1,424 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "Common.h"
|
||||
#include "VideoCommon.h"
|
||||
#include "VertexLoader.h"
|
||||
#include "VertexLoader_Normal.h"
|
||||
#include "NativeVertexWriter.h"
|
||||
|
||||
#define LOG_NORM8() PRIM_LOG("norm: %f %f %f, ", ((s8*)VertexManager::s_pCurBufferPointer)[-3]/127.0f, ((s8*)VertexManager::s_pCurBufferPointer)[-2]/127.0f, ((s8*)VertexManager::s_pCurBufferPointer)[-1]/127.0f);
|
||||
#define LOG_NORM16() PRIM_LOG("norm: %f %f %f, ", ((s16*)VertexManager::s_pCurBufferPointer)[-3]/32767.0f, ((s16*)VertexManager::s_pCurBufferPointer)[-2]/32767.0f, ((s16*)VertexManager::s_pCurBufferPointer)[-1]/32767.0f);
|
||||
#define LOG_NORMF() PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
|
||||
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
|
||||
|
||||
void VertexLoader_Normal::Init(void)
|
||||
{
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
|
||||
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
|
||||
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1);
|
||||
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3);
|
||||
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1);
|
||||
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3);
|
||||
}
|
||||
|
||||
unsigned int VertexLoader_Normal::GetSize(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3)
|
||||
{
|
||||
return m_Table[_type][_index3][_elements][_format].gc_size;
|
||||
}
|
||||
|
||||
TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3)
|
||||
{
|
||||
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
|
||||
return pFunc;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// --- Direct ---
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
|
||||
{
|
||||
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
|
||||
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
|
||||
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)DataReadU8()+0.5f) / 127.5f;
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
|
||||
{
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16()
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF()
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
|
||||
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
|
||||
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// --- Index8 ---
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)Memory_Read_U8(iAddress)+0.5f) / 127.5f;
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(signed char)Memory_Read_U8(iAddress+1)+0.5f) / 127.5f;
|
||||
// ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(signed char)Memory_Read_U8(iAddress+2)+0.5f) / 127.5f;
|
||||
// VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORM8();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
|
||||
*VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// --- Index16 ---
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i;
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+1);
|
||||
*VertexManager::s_pCurBufferPointer++ = Memory_Read_U8(iAddress+2);
|
||||
VertexManager::s_pCurBufferPointer++;
|
||||
LOG_NORM8();
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i;
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U16(iAddress);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U16(iAddress+2);
|
||||
((u16*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U16(iAddress+4);
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
LOG_NORM16();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i;
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORMF();
|
||||
}
|
||||
}
|
@ -1,111 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _VERTEXLOADER_NORMAL_H
|
||||
#define _VERTEXLOADER_NORMAL_H
|
||||
|
||||
#include "CommonTypes.h"
|
||||
|
||||
class VertexLoader_Normal
|
||||
{
|
||||
public:
|
||||
|
||||
// Init
|
||||
static void Init(void);
|
||||
|
||||
// GetSize
|
||||
static unsigned int GetSize(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3);
|
||||
|
||||
// GetFunction
|
||||
static TPipelineFunction GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3);
|
||||
|
||||
private:
|
||||
enum ENormalType
|
||||
{
|
||||
NRM_NOT_PRESENT = 0,
|
||||
NRM_DIRECT = 1,
|
||||
NRM_INDEX8 = 2,
|
||||
NRM_INDEX16 = 3,
|
||||
NUM_NRM_TYPE
|
||||
};
|
||||
|
||||
enum ENormalFormat
|
||||
{
|
||||
FORMAT_UBYTE = 0,
|
||||
FORMAT_BYTE = 1,
|
||||
FORMAT_USHORT = 2,
|
||||
FORMAT_SHORT = 3,
|
||||
FORMAT_FLOAT = 4,
|
||||
NUM_NRM_FORMAT
|
||||
};
|
||||
|
||||
enum ENormalElements
|
||||
{
|
||||
NRM_NBT = 0,
|
||||
NRM_NBT3 = 1,
|
||||
NUM_NRM_ELEMENTS
|
||||
};
|
||||
|
||||
enum ENormalIndices
|
||||
{
|
||||
NRM_INDICES1 = 0,
|
||||
NRM_INDICES3 = 1,
|
||||
NUM_NRM_INDICES
|
||||
};
|
||||
|
||||
struct Set {
|
||||
Set() {}
|
||||
Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {}
|
||||
int gc_size;
|
||||
TPipelineFunction function;
|
||||
// int pc_size;
|
||||
};
|
||||
|
||||
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
|
||||
|
||||
// direct
|
||||
static void LOADERDECL Normal_DirectByte();
|
||||
static void LOADERDECL Normal_DirectShort();
|
||||
static void LOADERDECL Normal_DirectFloat();
|
||||
static void LOADERDECL Normal_DirectByte3();
|
||||
static void LOADERDECL Normal_DirectShort3();
|
||||
static void LOADERDECL Normal_DirectFloat3();
|
||||
|
||||
// index8
|
||||
static void LOADERDECL Normal_Index8_Byte();
|
||||
static void LOADERDECL Normal_Index8_Short();
|
||||
static void LOADERDECL Normal_Index8_Float();
|
||||
static void LOADERDECL Normal_Index8_Byte3_Indices1();
|
||||
static void LOADERDECL Normal_Index8_Short3_Indices1();
|
||||
static void LOADERDECL Normal_Index8_Float3_Indices1();
|
||||
static void LOADERDECL Normal_Index8_Byte3_Indices3();
|
||||
static void LOADERDECL Normal_Index8_Short3_Indices3();
|
||||
static void LOADERDECL Normal_Index8_Float3_Indices3();
|
||||
|
||||
// index16
|
||||
static void LOADERDECL Normal_Index16_Byte();
|
||||
static void LOADERDECL Normal_Index16_Short();
|
||||
static void LOADERDECL Normal_Index16_Float();
|
||||
static void LOADERDECL Normal_Index16_Byte3_Indices1();
|
||||
static void LOADERDECL Normal_Index16_Short3_Indices1();
|
||||
static void LOADERDECL Normal_Index16_Float3_Indices1();
|
||||
static void LOADERDECL Normal_Index16_Byte3_Indices3();
|
||||
static void LOADERDECL Normal_Index16_Short3_Indices3();
|
||||
static void LOADERDECL Normal_Index16_Float3_Indices3();
|
||||
};
|
||||
|
||||
#endif
|
@ -1,242 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef VERTEXLOADER_POSITION_H
|
||||
#define VERTEXLOADER_POSITION_H
|
||||
|
||||
#include "Common.h"
|
||||
#include "VideoCommon.h"
|
||||
#include "VertexLoader.h"
|
||||
#include "VertexLoader_Position.h"
|
||||
#include "NativeVertexWriter.h"
|
||||
|
||||
extern float posScale;
|
||||
extern TVtxAttr *pVtxAttr;
|
||||
|
||||
#define LOG_VTX() PRIM_LOG("vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]);
|
||||
|
||||
// Thoughts on the implementation of a vertex loader compiler.
|
||||
// s_pCurBufferPointer should definitely be in a register.
|
||||
// Could load the position scale factor in XMM7, for example.
|
||||
|
||||
// The pointer inside DataReadU8 in another.
|
||||
// Let's check out Pos_ReadDirect_UByte(). For Byte, replace MOVZX with MOVSX.
|
||||
|
||||
/*
|
||||
MOVZX(32, R(EAX), MOffset(ESI, 0));
|
||||
MOVZX(32, R(EBX), MOffset(ESI, 1));
|
||||
MOVZX(32, R(ECX), MOffset(ESI, 2));
|
||||
MOVD(XMM0, R(EAX));
|
||||
MOVD(XMM1, R(EBX));
|
||||
MOVD(XMM2, R(ECX));
|
||||
CVTDQ2PS(XMM0, XMM0);
|
||||
CVTDQ2PS(XMM1, XMM1);
|
||||
CVTDQ2PS(XMM2, XMM2);
|
||||
MULSS(XMM0, XMM7);
|
||||
MULSS(XMM1, XMM7);
|
||||
MULSS(XMM2, XMM7);
|
||||
MOVSS(MOffset(EDI, 0), XMM0);
|
||||
MOVSS(MOffset(EDI, 4), XMM1);
|
||||
MOVSS(MOffset(EDI, 8), XMM2);
|
||||
|
||||
Alternatively, lookup table:
|
||||
MOVZX(32, R(EAX), MOffset(ESI, 0));
|
||||
MOVZX(32, R(EBX), MOffset(ESI, 1));
|
||||
MOVZX(32, R(ECX), MOffset(ESI, 2));
|
||||
MOV(32, R(EAX), MComplex(LUTREG, EAX, 4));
|
||||
MOV(32, R(EBX), MComplex(LUTREG, EBX, 4));
|
||||
MOV(32, R(ECX), MComplex(LUTREG, ECX, 4));
|
||||
MOV(MOffset(EDI, 0), XMM0);
|
||||
MOV(MOffset(EDI, 4), XMM1);
|
||||
MOV(MOffset(EDI, 8), XMM2);
|
||||
|
||||
SSE4:
|
||||
PINSRB(XMM0, MOffset(ESI, 0), 0);
|
||||
PINSRB(XMM0, MOffset(ESI, 1), 4);
|
||||
PINSRB(XMM0, MOffset(ESI, 2), 8);
|
||||
CVTDQ2PS(XMM0, XMM0);
|
||||
<two unpacks here to sign extend>
|
||||
MULPS(XMM0, XMM7);
|
||||
MOVUPS(MOffset(EDI, 0), XMM0);
|
||||
|
||||
*/
|
||||
|
||||
// ==============================================================================
|
||||
// Direct
|
||||
// ==============================================================================
|
||||
void LOADERDECL Pos_ReadDirect_UByte()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * posScale;
|
||||
if (pVtxAttr->PosElements)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU8() * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_Byte()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * posScale;
|
||||
if (pVtxAttr->PosElements)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s8)DataReadU8() * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_UShort()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * posScale;
|
||||
if (pVtxAttr->PosElements)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU16() * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_Short()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * posScale;
|
||||
if (pVtxAttr->PosElements)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s16)DataReadU16() * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_Float()
|
||||
{
|
||||
// No need to use floating point here.
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
if (pVtxAttr->PosElements)
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
#define Pos_ReadIndex_Byte(T) { \
|
||||
u32 iAddress = arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Memory_Read_U8(iAddress)) * posScale; \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Memory_Read_U8(iAddress+1)) * posScale; \
|
||||
if (pVtxAttr->PosElements) \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Memory_Read_U8(iAddress+2)) * posScale; \
|
||||
else \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; \
|
||||
LOG_VTX(); \
|
||||
VertexManager::s_pCurBufferPointer += 12; \
|
||||
}
|
||||
|
||||
#define Pos_ReadIndex_Short(T) { \
|
||||
u32 iAddress = arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Memory_Read_U16(iAddress)) * posScale; \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Memory_Read_U16(iAddress+2)) * posScale; \
|
||||
if (pVtxAttr->PosElements) \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Memory_Read_U16(iAddress+4)) * posScale; \
|
||||
else \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; \
|
||||
LOG_VTX(); \
|
||||
VertexManager::s_pCurBufferPointer += 12; \
|
||||
}
|
||||
|
||||
#define Pos_ReadIndex_Float() { \
|
||||
u32 iAddress = arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]); \
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); \
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); \
|
||||
if (pVtxAttr->PosElements) \
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); \
|
||||
else \
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; \
|
||||
LOG_VTX(); \
|
||||
VertexManager::s_pCurBufferPointer += 12; \
|
||||
}
|
||||
|
||||
// ==============================================================================
|
||||
// Index 8
|
||||
// ==============================================================================
|
||||
void LOADERDECL Pos_ReadIndex8_UByte()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
Pos_ReadIndex_Byte(u8);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex8_Byte()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
Pos_ReadIndex_Byte(s8);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex8_UShort()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
Pos_ReadIndex_Short(u16);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex8_Short()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
Pos_ReadIndex_Short(s16);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex8_Float()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
Pos_ReadIndex_Float();
|
||||
}
|
||||
|
||||
// ==============================================================================
|
||||
// Index 16
|
||||
// ==============================================================================
|
||||
|
||||
void LOADERDECL Pos_ReadIndex16_UByte(){
|
||||
u16 Index = DataReadU16();
|
||||
Pos_ReadIndex_Byte(u8);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex16_Byte(){
|
||||
u16 Index = DataReadU16();
|
||||
Pos_ReadIndex_Byte(s8);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex16_UShort(){
|
||||
u16 Index = DataReadU16();
|
||||
Pos_ReadIndex_Short(u16);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex16_Short()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
Pos_ReadIndex_Short(s16);
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadIndex16_Float()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
Pos_ReadIndex_Float();
|
||||
}
|
||||
|
||||
#endif
|
@ -1,39 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef VERTEXLOADER_POSITION_H
|
||||
#define VERTEXLOADER_POSITION_H
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_UByte();
|
||||
void LOADERDECL Pos_ReadDirect_Byte();
|
||||
void LOADERDECL Pos_ReadDirect_UShort();
|
||||
void LOADERDECL Pos_ReadDirect_Short();
|
||||
void LOADERDECL Pos_ReadDirect_Float();
|
||||
|
||||
void LOADERDECL Pos_ReadIndex8_UByte();
|
||||
void LOADERDECL Pos_ReadIndex8_Byte();
|
||||
void LOADERDECL Pos_ReadIndex8_UShort();
|
||||
void LOADERDECL Pos_ReadIndex8_Short();
|
||||
void LOADERDECL Pos_ReadIndex8_Float();
|
||||
|
||||
void LOADERDECL Pos_ReadIndex16_UByte();
|
||||
void LOADERDECL Pos_ReadIndex16_Byte();
|
||||
void LOADERDECL Pos_ReadIndex16_UShort();
|
||||
void LOADERDECL Pos_ReadIndex16_Short();
|
||||
void LOADERDECL Pos_ReadIndex16_Float();
|
||||
|
||||
#endif
|
@ -1,338 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef VERTEXLOADER_TEXCOORD_H
|
||||
#define VERTEXLOADER_TEXCOORD_H
|
||||
|
||||
#include "Common.h"
|
||||
#include "VideoCommon.h"
|
||||
#include "VertexLoader.h"
|
||||
#include "VertexLoader_Position.h"
|
||||
#include "NativeVertexWriter.h"
|
||||
|
||||
#define LOG_TEX1() PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]);
|
||||
#define LOG_TEX2() PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]);
|
||||
|
||||
extern int tcIndex;
|
||||
extern float tcScale[8];
|
||||
|
||||
void LOADERDECL TexCoord_Read_Dummy()
|
||||
{
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_UByte1()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_UByte2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_Byte1()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_Byte2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_UShort1()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_UShort2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_Short1()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_Short2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_Float1()
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_Float2()
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
// ==================================================================================
|
||||
void LOADERDECL TexCoord_ReadIndex8_UByte1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_UByte2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_Byte1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_Byte2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_UShort1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_UShort2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_Short1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_Short2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_Float1()
|
||||
{
|
||||
u16 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_Float2()
|
||||
{
|
||||
u16 Index = DataReadU8();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4);
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
// ==================================================================================
|
||||
void LOADERDECL TexCoord_ReadIndex16_UByte1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_UByte2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Byte1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_Byte2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)Memory_Read_U8(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)Memory_Read_U8(iAddress+1) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_UShort1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_UShort2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Memory_Read_U16(iAddress) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Memory_Read_U16(iAddress+2) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress + 4);
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,53 +0,0 @@
|
||||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef VERTEXLOADER_TEXCOORD_H
|
||||
#define VERTEXLOADER_TEXCOORD_H
|
||||
|
||||
void LOADERDECL TexCoord_Read_Dummy();
|
||||
void LOADERDECL TexCoord_ReadDirect_UByte1();
|
||||
void LOADERDECL TexCoord_ReadDirect_UByte2();
|
||||
void LOADERDECL TexCoord_ReadDirect_Byte1();
|
||||
void LOADERDECL TexCoord_ReadDirect_Byte2();
|
||||
void LOADERDECL TexCoord_ReadDirect_UShort1();
|
||||
void LOADERDECL TexCoord_ReadDirect_UShort2();
|
||||
void LOADERDECL TexCoord_ReadDirect_Short1();
|
||||
void LOADERDECL TexCoord_ReadDirect_Short2();
|
||||
void LOADERDECL TexCoord_ReadDirect_Float1();
|
||||
void LOADERDECL TexCoord_ReadDirect_Float2();
|
||||
void LOADERDECL TexCoord_ReadIndex8_UByte1();
|
||||
void LOADERDECL TexCoord_ReadIndex8_UByte2();
|
||||
void LOADERDECL TexCoord_ReadIndex8_Byte1();
|
||||
void LOADERDECL TexCoord_ReadIndex8_Byte2();
|
||||
void LOADERDECL TexCoord_ReadIndex8_UShort1();
|
||||
void LOADERDECL TexCoord_ReadIndex8_UShort2();
|
||||
void LOADERDECL TexCoord_ReadIndex8_Short1();
|
||||
void LOADERDECL TexCoord_ReadIndex8_Short2();
|
||||
void LOADERDECL TexCoord_ReadIndex8_Float1();
|
||||
void LOADERDECL TexCoord_ReadIndex8_Float2();
|
||||
void LOADERDECL TexCoord_ReadIndex16_UByte1();
|
||||
void LOADERDECL TexCoord_ReadIndex16_UByte2();
|
||||
void LOADERDECL TexCoord_ReadIndex16_Byte1();
|
||||
void LOADERDECL TexCoord_ReadIndex16_Byte2();
|
||||
void LOADERDECL TexCoord_ReadIndex16_UShort1();
|
||||
void LOADERDECL TexCoord_ReadIndex16_UShort2();
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short1();
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short2();
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float1();
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float2();
|
||||
|
||||
#endif
|
@ -1,473 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright (c) 2001 Advanced Micro Devices, Inc.
|
||||
|
||||
LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
|
||||
EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
|
||||
NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
|
||||
PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
|
||||
DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
|
||||
BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
|
||||
INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
|
||||
OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
|
||||
OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
|
||||
NOT APPLY TO YOU.
|
||||
|
||||
AMD does not assume any responsibility for any errors which may appear in the
|
||||
Materials nor any responsibility to support or update the Materials. AMD retains
|
||||
the right to make changes to its test specifications at any time, without notice.
|
||||
|
||||
NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
|
||||
further information, software, technical information, know-how, or show-how
|
||||
available to you.
|
||||
|
||||
So that all may benefit from your experience, please report any problems
|
||||
or suggestions about this software to 3dsdk.support@amd.com
|
||||
|
||||
AMD Developer Technologies, M/S 585
|
||||
Advanced Micro Devices, Inc.
|
||||
5900 E. Ben White Blvd.
|
||||
Austin, TX 78741
|
||||
3dsdk.support@amd.com
|
||||
******************************************************************************/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/*****************************************************************************
|
||||
MEMCPY_AMD.CPP
|
||||
******************************************************************************/
|
||||
|
||||
// Very optimized memcpy() routine for AMD Athlon and Duron family.
|
||||
// This code uses any of FOUR different basic copy methods, depending
|
||||
// on the transfer size.
|
||||
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
|
||||
// "Streaming Store"), and also uses the software prefetch instructions,
|
||||
// be sure you're running on Athlon/Duron or other recent CPU before calling!
|
||||
|
||||
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
|
||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||
// form which is an "unrolled loop".
|
||||
|
||||
#define IN_CACHE_COPY 2 * 1024 // upper limit for movq/movq copy w/SW prefetch
|
||||
// Next is a copy that uses the MMX registers to copy 8 bytes at a time,
|
||||
// also using the "unrolled loop" optimization. This code uses
|
||||
// the software prefetch instruction to get the data into the cache.
|
||||
|
||||
#define UNCACHED_COPY 4 * 1024 // upper limit for movq/movntq w/SW prefetch
|
||||
// For larger blocks, which will spill beyond the cache, it's faster to
|
||||
// use the Streaming Store instruction MOVNTQ. This write instruction
|
||||
// bypasses the cache and writes straight to main memory. This code also
|
||||
// uses the software prefetch instruction to pre-read the data.
|
||||
// USE 64 * 1024 FOR THIS VALUE IF YOU'RE ALWAYS FILLING A "CLEAN CACHE"
|
||||
|
||||
#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
|
||||
#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
|
||||
// For the largest size blocks, a special technique called Block Prefetch
|
||||
// can be used to accelerate the read operations. Block Prefetch reads
|
||||
// one address per cache line, for a series of cache lines, in a short loop.
|
||||
// This is faster than using software prefetch. The technique is great for
|
||||
// getting maximum read bandwidth, especially in DDR memory systems.
|
||||
|
||||
//#include <stddef.h>
|
||||
|
||||
// Inline assembly syntax for use with Visual C++
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__x86_64__) && !defined(_M_X64)
|
||||
|
||||
void * memcpy_amd(void *dest, const void *src, size_t n)
|
||||
{
|
||||
__asm {
|
||||
mov ecx, [n] ; number of bytes to copy
|
||||
mov edi, [dest] ; destination
|
||||
mov esi, [src] ; source
|
||||
mov ebx, ecx ; keep a copy of count
|
||||
|
||||
cld
|
||||
cmp ecx, TINY_BLOCK_COPY
|
||||
jb $memcpy_ic_3 ; tiny? skip mmx copy
|
||||
|
||||
cmp ecx, 32*1024 ; do not align between 32k-64k because
|
||||
jbe $memcpy_do_align ; it appears to be slower
|
||||
cmp ecx, 64*1024
|
||||
jbe $memcpy_align_done
|
||||
$memcpy_do_align:
|
||||
mov ecx, 8 ; a trick that is faster than rep movsb...
|
||||
sub ecx, edi ; align destination to qword
|
||||
and ecx, 111b ; get the low bits
|
||||
sub ebx, ecx ; update copy count
|
||||
neg ecx ; set up to jump into the array
|
||||
add ecx, offset $memcpy_align_done
|
||||
jmp ecx ; jump to array of movsb''s
|
||||
|
||||
align 4
|
||||
movsb
|
||||
movsb
|
||||
movsb
|
||||
movsb
|
||||
movsb
|
||||
movsb
|
||||
movsb
|
||||
movsb
|
||||
|
||||
$memcpy_align_done: ; destination is dword aligned
|
||||
mov ecx, ebx ; number of bytes left to copy
|
||||
shr ecx, 6 ; get 64-byte block count
|
||||
jz $memcpy_ic_2 ; finish the last few bytes
|
||||
|
||||
cmp ecx, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
|
||||
jae $memcpy_uc_test
|
||||
|
||||
// This is small block copy that uses the MMX registers to copy 8 bytes
|
||||
// at a time. It uses the "unrolled loop" optimization, and also uses
|
||||
// the software prefetch instruction to get the data into the cache.
|
||||
align 16
|
||||
$memcpy_ic_1: ; 64-byte block copies, in-cache copy
|
||||
|
||||
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
|
||||
|
||||
movq mm0, [esi+0] ; read 64 bits
|
||||
movq mm1, [esi+8]
|
||||
movq [edi+0], mm0 ; write 64 bits
|
||||
movq [edi+8], mm1 ; note: the normal movq writes the
|
||||
movq mm2, [esi+16] ; data to cache; a cache line will be
|
||||
movq mm3, [esi+24] ; allocated as needed, to store the data
|
||||
movq [edi+16], mm2
|
||||
movq [edi+24], mm3
|
||||
movq mm0, [esi+32]
|
||||
movq mm1, [esi+40]
|
||||
movq [edi+32], mm0
|
||||
movq [edi+40], mm1
|
||||
movq mm2, [esi+48]
|
||||
movq mm3, [esi+56]
|
||||
movq [edi+48], mm2
|
||||
movq [edi+56], mm3
|
||||
|
||||
add esi, 64 ; update source pointer
|
||||
add edi, 64 ; update destination pointer
|
||||
dec ecx ; count down
|
||||
jnz $memcpy_ic_1 ; last 64-byte block?
|
||||
|
||||
$memcpy_ic_2:
|
||||
mov ecx, ebx ; has valid low 6 bits of the byte count
|
||||
$memcpy_ic_3:
|
||||
shr ecx, 2 ; dword count
|
||||
and ecx, 1111b ; only look at the "remainder" bits
|
||||
neg ecx ; set up to jump into the array
|
||||
add ecx, offset $memcpy_last_few
|
||||
jmp ecx ; jump to array of movsd''s
|
||||
|
||||
$memcpy_uc_test:
|
||||
cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
|
||||
jae $memcpy_bp_1
|
||||
|
||||
$memcpy_64_test:
|
||||
or ecx, ecx ; tail end of block prefetch will jump here
|
||||
jz $memcpy_ic_2 ; no more 64-byte blocks left
|
||||
|
||||
// For larger blocks, which will spill beyond the cache, it's faster to
|
||||
// use the Streaming Store instruction MOVNTQ. This write instruction
|
||||
// bypasses the cache and writes straight to main memory. This code also
|
||||
// uses the software prefetch instruction to pre-read the data.
|
||||
align 16
|
||||
$memcpy_uc_1: ; 64-byte blocks, uncached copy
|
||||
|
||||
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
|
||||
|
||||
movq mm0,[esi+0] ; read 64 bits
|
||||
add edi,64 ; update destination pointer
|
||||
movq mm1,[esi+8]
|
||||
add esi,64 ; update source pointer
|
||||
movq mm2,[esi-48]
|
||||
movntq [edi-64], mm0 ; write 64 bits, bypassing the cache
|
||||
movq mm0,[esi-40] ; note: movntq also prevents the CPU
|
||||
movntq [edi-56], mm1 ; from READING the destination address
|
||||
movq mm1,[esi-32] ; into the cache, only to be over-written
|
||||
movntq [edi-48], mm2 ; so that also helps performance
|
||||
movq mm2,[esi-24]
|
||||
movntq [edi-40], mm0
|
||||
movq mm0,[esi-16]
|
||||
movntq [edi-32], mm1
|
||||
movq mm1,[esi-8]
|
||||
movntq [edi-24], mm2
|
||||
movntq [edi-16], mm0
|
||||
dec ecx
|
||||
movntq [edi-8], mm1
|
||||
jnz $memcpy_uc_1 ; last 64-byte block?
|
||||
|
||||
jmp $memcpy_ic_2 ; almost done
|
||||
|
||||
// For the largest size blocks, a special technique called Block Prefetch
|
||||
// can be used to accelerate the read operations. Block Prefetch reads
|
||||
// one address per cache line, for a series of cache lines, in a short loop.
|
||||
// This is faster than using software prefetch. The technique is great for
|
||||
// getting maximum read bandwidth, especially in DDR memory systems.
|
||||
$memcpy_bp_1: ; large blocks, block prefetch copy
|
||||
|
||||
cmp ecx, CACHEBLOCK ; big enough to run another prefetch loop?
|
||||
jl $memcpy_64_test ; no, back to regular uncached copy
|
||||
|
||||
mov eax, CACHEBLOCK / 2 ; block prefetch loop, unrolled 2X
|
||||
add esi, CACHEBLOCK * 64 ; move to the top of the block
|
||||
align 16
|
||||
$memcpy_bp_2:
|
||||
mov edx, [esi-64] ; grab one address per cache line
|
||||
mov edx, [esi-128] ; grab one address per cache line
|
||||
sub esi, 128 ; go reverse order to suppress HW prefetcher
|
||||
dec eax ; count down the cache lines
|
||||
jnz $memcpy_bp_2 ; keep grabbing more lines into cache
|
||||
|
||||
mov eax, CACHEBLOCK ; now that it is in cache, do the copy
|
||||
align 16
|
||||
$memcpy_bp_3:
|
||||
movq mm0, [esi ] ; read 64 bits
|
||||
movq mm1, [esi+ 8]
|
||||
movq mm2, [esi+16]
|
||||
movq mm3, [esi+24]
|
||||
movq mm4, [esi+32]
|
||||
movq mm5, [esi+40]
|
||||
movq mm6, [esi+48]
|
||||
movq mm7, [esi+56]
|
||||
add esi, 64 ; update source pointer
|
||||
movntq [edi ], mm0 ; write 64 bits, bypassing cache
|
||||
movntq [edi+ 8], mm1 ; note: movntq also prevents the CPU
|
||||
movntq [edi+16], mm2 ; from READING the destination address
|
||||
movntq [edi+24], mm3 ; into the cache, only to be over-written,
|
||||
movntq [edi+32], mm4 ; so that also helps performance
|
||||
movntq [edi+40], mm5
|
||||
movntq [edi+48], mm6
|
||||
movntq [edi+56], mm7
|
||||
add edi, 64 ; update dest pointer
|
||||
|
||||
dec eax ; count down
|
||||
|
||||
jnz $memcpy_bp_3 ; keep copying
|
||||
sub ecx, CACHEBLOCK ; update the 64-byte block count
|
||||
jmp $memcpy_bp_1 ; keep processing chunks
|
||||
|
||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||
// form which is an "unrolled loop". Then it handles the last few bytes.
|
||||
align 4
|
||||
movsd
|
||||
movsd ; perform last 1-15 dword copies
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd ; perform last 1-7 dword copies
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
movsd
|
||||
|
||||
$memcpy_last_few: ; dword aligned from before movsd''s
|
||||
mov ecx, ebx ; has valid low 2 bits of the byte count
|
||||
and ecx, 11b ; the last few cows must come home
|
||||
jz $memcpy_final ; no more, lets leave
|
||||
rep movsb ; the last 1, 2, or 3 bytes
|
||||
|
||||
$memcpy_final:
|
||||
emms ; clean up the MMX state
|
||||
sfence ; flush the write buffer
|
||||
mov eax, [dest] ; ret value = destination pointer
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// mmx memcmp implementation, size has to be a multiple of 8
|
||||
// returns 0 is equal, nonzero value if not equal
|
||||
// ~10 times faster than standard memcmp
|
||||
// (zerofrog)
|
||||
unsigned char memcmp_mmx(const void* src1, const void* src2, int cmpsize)
|
||||
{
|
||||
assert( (cmpsize&7) == 0 );
|
||||
|
||||
__asm {
|
||||
push esi
|
||||
mov ecx, cmpsize
|
||||
mov edx, src1
|
||||
mov esi, src2
|
||||
|
||||
cmp ecx, 32
|
||||
jl Done4
|
||||
|
||||
// custom test first 8 to make sure things are ok
|
||||
movq mm0, [esi]
|
||||
movq mm1, [esi+8]
|
||||
pcmpeqd mm0, [edx]
|
||||
pcmpeqd mm1, [edx+8]
|
||||
pand mm0, mm1
|
||||
movq mm2, [esi+16]
|
||||
pmovmskb eax, mm0
|
||||
movq mm3, [esi+24]
|
||||
|
||||
// check if eq
|
||||
cmp eax, 0xff
|
||||
je NextComp
|
||||
mov eax, 1
|
||||
jmp End
|
||||
|
||||
NextComp:
|
||||
pcmpeqd mm2, [edx+16]
|
||||
pcmpeqd mm3, [edx+24]
|
||||
pand mm2, mm3
|
||||
pmovmskb eax, mm2
|
||||
|
||||
sub ecx, 32
|
||||
add esi, 32
|
||||
add edx, 32
|
||||
|
||||
// check if eq
|
||||
cmp eax, 0xff
|
||||
je ContinueTest
|
||||
mov eax, 1
|
||||
jmp End
|
||||
|
||||
cmp ecx, 64
|
||||
jl Done8
|
||||
|
||||
Cmp8:
|
||||
movq mm0, [esi]
|
||||
movq mm1, [esi+8]
|
||||
movq mm2, [esi+16]
|
||||
movq mm3, [esi+24]
|
||||
movq mm4, [esi+32]
|
||||
movq mm5, [esi+40]
|
||||
movq mm6, [esi+48]
|
||||
movq mm7, [esi+56]
|
||||
pcmpeqd mm0, [edx]
|
||||
pcmpeqd mm1, [edx+8]
|
||||
pcmpeqd mm2, [edx+16]
|
||||
pcmpeqd mm3, [edx+24]
|
||||
pand mm0, mm1
|
||||
pcmpeqd mm4, [edx+32]
|
||||
pand mm0, mm2
|
||||
pcmpeqd mm5, [edx+40]
|
||||
pand mm0, mm3
|
||||
pcmpeqd mm6, [edx+48]
|
||||
pand mm0, mm4
|
||||
pcmpeqd mm7, [edx+56]
|
||||
pand mm0, mm5
|
||||
pand mm0, mm6
|
||||
pand mm0, mm7
|
||||
pmovmskb eax, mm0
|
||||
|
||||
// check if eq
|
||||
cmp eax, 0xff
|
||||
je Continue
|
||||
mov eax, 1
|
||||
jmp End
|
||||
|
||||
Continue:
|
||||
sub ecx, 64
|
||||
add esi, 64
|
||||
add edx, 64
|
||||
ContinueTest:
|
||||
cmp ecx, 64
|
||||
jge Cmp8
|
||||
|
||||
Done8:
|
||||
test ecx, 0x20
|
||||
jz Done4
|
||||
movq mm0, [esi]
|
||||
movq mm1, [esi+8]
|
||||
movq mm2, [esi+16]
|
||||
movq mm3, [esi+24]
|
||||
pcmpeqd mm0, [edx]
|
||||
pcmpeqd mm1, [edx+8]
|
||||
pcmpeqd mm2, [edx+16]
|
||||
pcmpeqd mm3, [edx+24]
|
||||
pand mm0, mm1
|
||||
pand mm0, mm2
|
||||
pand mm0, mm3
|
||||
pmovmskb eax, mm0
|
||||
sub ecx, 32
|
||||
add esi, 32
|
||||
add edx, 32
|
||||
|
||||
// check if eq
|
||||
cmp eax, 0xff
|
||||
je Done4
|
||||
mov eax, 1
|
||||
jmp End
|
||||
|
||||
Done4:
|
||||
cmp ecx, 24
|
||||
jne Done2
|
||||
movq mm0, [esi]
|
||||
movq mm1, [esi+8]
|
||||
movq mm2, [esi+16]
|
||||
pcmpeqd mm0, [edx]
|
||||
pcmpeqd mm1, [edx+8]
|
||||
pcmpeqd mm2, [edx+16]
|
||||
pand mm0, mm1
|
||||
pand mm0, mm2
|
||||
pmovmskb eax, mm0
|
||||
|
||||
// check if eq
|
||||
cmp eax, 0xff
|
||||
setne al
|
||||
jmp End
|
||||
|
||||
Done2:
|
||||
cmp ecx, 16
|
||||
jne Done1
|
||||
|
||||
movq mm0, [esi]
|
||||
movq mm1, [esi+8]
|
||||
pcmpeqd mm0, [edx]
|
||||
pcmpeqd mm1, [edx+8]
|
||||
pand mm0, mm1
|
||||
pmovmskb eax, mm0
|
||||
|
||||
// check if eq
|
||||
cmp eax, 0xff
|
||||
setne al
|
||||
jmp End
|
||||
|
||||
Done1:
|
||||
cmp ecx, 8
|
||||
jne Done
|
||||
|
||||
mov eax, [esi]
|
||||
mov esi, [esi+4]
|
||||
cmp eax, [edx]
|
||||
je Next
|
||||
mov eax, 1
|
||||
jmp End
|
||||
|
||||
Next:
|
||||
cmp esi, [edx+4]
|
||||
setne al
|
||||
jmp End
|
||||
|
||||
Done:
|
||||
xor eax, eax
|
||||
|
||||
End:
|
||||
pop esi
|
||||
emms
|
||||
}
|
||||
}
|
||||
|
||||
#else // _MSC_VER
|
||||
// assume gcc or mingw or win x64
|
||||
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
|
||||
void * memcpy_amd(void *dest, const void *src, size_t n)
|
||||
{
|
||||
memcpy(dest, src, n);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user