mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-23 06:09:50 -06:00
Merge branch 'master' into GLSL-master
Conflicts: CMakeLists.txt Source/Core/DolphinWX/CMakeLists.txt Source/Core/DolphinWX/Src/GLInterface.h Source/Core/VideoCommon/Src/PixelShaderGen.cpp Source/Core/VideoCommon/Src/TextureCacheBase.cpp Source/Core/VideoCommon/Src/VertexManagerBase.cpp Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp Source/Plugins/Plugin_VideoOGL/Src/main.cpp
This commit is contained in:
@ -3,7 +3,6 @@ set(SRCS Src/BPFunctions.cpp
|
||||
Src/BPStructs.cpp
|
||||
Src/CPMemory.cpp
|
||||
Src/CommandProcessor.cpp
|
||||
Src/DLCache.cpp
|
||||
Src/Debugger.cpp
|
||||
Src/Fifo.cpp
|
||||
Src/FPSCounter.cpp
|
||||
@ -17,6 +16,7 @@ set(SRCS Src/BPFunctions.cpp
|
||||
Src/OpcodeDecoding.cpp
|
||||
Src/OpenCL.cpp
|
||||
Src/OpenCL/OCLTextureDecoder.cpp
|
||||
Src/PerfQueryBase.cpp
|
||||
Src/PixelEngine.cpp
|
||||
Src/PixelShaderGen.cpp
|
||||
Src/PixelShaderManager.cpp
|
||||
@ -24,7 +24,6 @@ set(SRCS Src/BPFunctions.cpp
|
||||
Src/Statistics.cpp
|
||||
Src/TextureCacheBase.cpp
|
||||
Src/TextureConversionShader.cpp
|
||||
Src/TextureDecoder.cpp
|
||||
Src/VertexLoader.cpp
|
||||
Src/VertexLoaderManager.cpp
|
||||
Src/VertexLoader_Color.cpp
|
||||
@ -41,6 +40,14 @@ set(SRCS Src/BPFunctions.cpp
|
||||
Src/memcpy_amd.cpp)
|
||||
|
||||
set(LIBS core)
|
||||
|
||||
if(NOT _M_GENERIC)
|
||||
set(SRCS ${SRCS} Src/x64TextureDecoder.cpp
|
||||
Src/x64DLCache.cpp)
|
||||
else()
|
||||
set(SRCS ${SRCS} Src/GenericTextureDecoder.cpp
|
||||
Src/GenericDLCache.cpp)
|
||||
endif()
|
||||
if(NOT ${CL} STREQUAL CL-NOTFOUND)
|
||||
list(APPEND LIBS ${CL})
|
||||
endif()
|
||||
|
@ -157,9 +157,21 @@ void AVIDump::Stop()
|
||||
NOTICE_LOG(VIDEO, "Stop");
|
||||
}
|
||||
|
||||
void AVIDump::AddFrame(char *data)
|
||||
void AVIDump::AddFrame(const u8* data, int w, int h)
|
||||
{
|
||||
AVIStreamWrite(m_streamCompressed, ++m_frameCount, 1, (LPVOID) data, m_bitmap.biSizeImage, AVIIF_KEYFRAME, NULL, &m_byteBuffer);
|
||||
static bool shown_error = false;
|
||||
if ((w != m_bitmap.biWidth || h != m_bitmap.biHeight) && !shown_error)
|
||||
{
|
||||
PanicAlert("You have resized the window while dumping frames.\n"
|
||||
"Nothing sane can be done to handle this.\n"
|
||||
"Your video will likely be broken.");
|
||||
shown_error = true;
|
||||
|
||||
m_bitmap.biWidth = w;
|
||||
m_bitmap.biHeight = h;
|
||||
}
|
||||
|
||||
AVIStreamWrite(m_streamCompressed, ++m_frameCount, 1, const_cast<u8*>(data), m_bitmap.biSizeImage, AVIIF_KEYFRAME, NULL, &m_byteBuffer);
|
||||
m_totalBytes += m_byteBuffer;
|
||||
// Close the recording if the file is more than 2gb
|
||||
// VfW can't properly save files over 2gb in size, but can keep writing to them up to 4gb.
|
||||
@ -298,9 +310,9 @@ bool AVIDump::CreateFile()
|
||||
return true;
|
||||
}
|
||||
|
||||
void AVIDump::AddFrame(uint8_t *data, int width, int height)
|
||||
void AVIDump::AddFrame(const u8* data, int width, int height)
|
||||
{
|
||||
avpicture_fill((AVPicture *)s_BGRFrame, data, PIX_FMT_BGR24, width, height);
|
||||
avpicture_fill((AVPicture *)s_BGRFrame, const_cast<u8*>(data), PIX_FMT_BGR24, width, height);
|
||||
|
||||
// Convert image from BGR24 to desired pixel format, and scale to initial
|
||||
// width and height
|
||||
|
@ -24,6 +24,8 @@
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#include "CommonTypes.h"
|
||||
|
||||
class AVIDump
|
||||
{
|
||||
private:
|
||||
@ -36,11 +38,11 @@ class AVIDump
|
||||
public:
|
||||
#ifdef _WIN32
|
||||
static bool Start(HWND hWnd, int w, int h);
|
||||
static void AddFrame(char *data);
|
||||
#else
|
||||
static bool Start(int w, int h);
|
||||
static void AddFrame(uint8_t *data, int width, int height);
|
||||
#endif
|
||||
static void AddFrame(const u8* data, int width, int height);
|
||||
|
||||
static void Stop();
|
||||
};
|
||||
|
||||
|
@ -62,7 +62,7 @@
|
||||
#define BPMEM_COPYFILTER1 0x54
|
||||
#define BPMEM_CLEARBBOX1 0x55
|
||||
#define BPMEM_CLEARBBOX2 0x56
|
||||
#define BPMEM_UNKNOWN_57 0x57
|
||||
#define BPMEM_CLEAR_PIXEL_PERF 0x57
|
||||
#define BPMEM_REVBITS 0x58
|
||||
#define BPMEM_SCISSOROFFSET 0x59
|
||||
#define BPMEM_PRELOAD_ADDR 0x60
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "VertexShaderManager.h"
|
||||
#include "Thread.h"
|
||||
#include "HW/Memmap.h"
|
||||
#include "PerfQueryBase.h"
|
||||
|
||||
using namespace BPFunctions;
|
||||
|
||||
@ -62,7 +63,6 @@ void RenderToXFB(const BPCmd &bp, const EFBRectangle &rc, float yScale, float xf
|
||||
{
|
||||
Renderer::RenderToXFB(xfbAddr, dstWidth, dstHeight, rc, gamma);
|
||||
}
|
||||
|
||||
void BPWritten(const BPCmd& bp)
|
||||
{
|
||||
/*
|
||||
@ -144,7 +144,8 @@ void BPWritten(const BPCmd& bp)
|
||||
|| bp.address == BPMEM_LOADTLUT0
|
||||
|| bp.address == BPMEM_LOADTLUT1
|
||||
|| bp.address == BPMEM_TEXINVALIDATE
|
||||
|| bp.address == BPMEM_PRELOAD_MODE))
|
||||
|| bp.address == BPMEM_PRELOAD_MODE
|
||||
|| bp.address == BPMEM_CLEAR_PIXEL_PERF))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -488,9 +489,10 @@ void BPWritten(const BPCmd& bp)
|
||||
case BPMEM_IND_IMASK: // Index Mask ?
|
||||
case BPMEM_REVBITS: // Always set to 0x0F when GX_InitRevBits() is called.
|
||||
break;
|
||||
|
||||
case BPMEM_UNKNOWN_57: // Sunshine alternates this register between values 0x000 and 0xAAA
|
||||
DEBUG_LOG(VIDEO, "Unknown BP Reg 0x57: %08x", bp.newvalue);
|
||||
|
||||
case BPMEM_CLEAR_PIXEL_PERF:
|
||||
// GXClearPixMetric writes 0xAAA here, Sunshine alternates this register between values 0x000 and 0xAAA
|
||||
g_perf_query->ResetQuery();
|
||||
break;
|
||||
|
||||
case BPMEM_PRELOAD_ADDR:
|
||||
|
@ -20,6 +20,8 @@
|
||||
#ifndef _DATAREADER_H
|
||||
#define _DATAREADER_H
|
||||
|
||||
#include "VertexManagerBase.h"
|
||||
|
||||
extern u8* g_pVideoData;
|
||||
|
||||
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
||||
@ -31,43 +33,63 @@ __forceinline void DataSkip(u32 skip)
|
||||
g_pVideoData += skip;
|
||||
}
|
||||
|
||||
// probably unnecessary
|
||||
template <int count>
|
||||
__forceinline void DataSkip()
|
||||
{
|
||||
g_pVideoData += count;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataPeek(int _uOffset)
|
||||
{
|
||||
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset));
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: kill these
|
||||
__forceinline u8 DataPeek8(int _uOffset)
|
||||
{
|
||||
return g_pVideoData[_uOffset];
|
||||
return DataPeek<u8>(_uOffset);
|
||||
}
|
||||
|
||||
__forceinline u16 DataPeek16(int _uOffset)
|
||||
{
|
||||
return Common::swap16(*(u16*)&g_pVideoData[_uOffset]);
|
||||
return DataPeek<u16>(_uOffset);
|
||||
}
|
||||
|
||||
__forceinline u32 DataPeek32(int _uOffset)
|
||||
{
|
||||
return Common::swap32(*(u32*)&g_pVideoData[_uOffset]);
|
||||
return DataPeek<u32>(_uOffset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataRead()
|
||||
{
|
||||
auto const result = DataPeek<T>(0);
|
||||
DataSkip<sizeof(T)>();
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: kill these
|
||||
__forceinline u8 DataReadU8()
|
||||
{
|
||||
return *g_pVideoData++;
|
||||
return DataRead<u8>();
|
||||
}
|
||||
|
||||
__forceinline s8 DataReadS8()
|
||||
{
|
||||
return (s8)(*g_pVideoData++);
|
||||
return DataRead<s8>();
|
||||
}
|
||||
|
||||
__forceinline u16 DataReadU16()
|
||||
{
|
||||
u16 tmp = Common::swap16(*(u16*)g_pVideoData);
|
||||
g_pVideoData += 2;
|
||||
return tmp;
|
||||
return DataRead<u16>();
|
||||
}
|
||||
|
||||
__forceinline u32 DataReadU32()
|
||||
{
|
||||
u32 tmp = Common::swap32(*(u32*)g_pVideoData);
|
||||
g_pVideoData += 4;
|
||||
return tmp;
|
||||
return DataRead<u32>();
|
||||
}
|
||||
|
||||
typedef void (*DataReadU32xNfunc)(u32 *buf);
|
||||
@ -120,58 +142,16 @@ __forceinline u32 DataReadU32Unswapped()
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__forceinline T DataRead()
|
||||
{
|
||||
T tmp = *(T*)g_pVideoData;
|
||||
g_pVideoData += sizeof(T);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline u16 DataRead()
|
||||
{
|
||||
u16 tmp = Common::swap16(*(u16*)g_pVideoData);
|
||||
g_pVideoData += 2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline s16 DataRead()
|
||||
{
|
||||
s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData);
|
||||
g_pVideoData += 2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline u32 DataRead()
|
||||
{
|
||||
u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData);
|
||||
g_pVideoData += 4;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline s32 DataRead()
|
||||
{
|
||||
s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData);
|
||||
g_pVideoData += 4;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__forceinline float DataReadF32()
|
||||
{
|
||||
union {u32 i; float f;} temp;
|
||||
temp.i = Common::swap32(*(u32*)g_pVideoData);
|
||||
g_pVideoData += 4;
|
||||
float tmp = temp.f;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__forceinline u8* DataGetPosition()
|
||||
{
|
||||
return g_pVideoData;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline void DataWrite(T data)
|
||||
{
|
||||
*(T*)VertexManager::s_pCurBufferPointer = data;
|
||||
VertexManager::s_pCurBufferPointer += sizeof(T);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -205,10 +205,6 @@ LRESULT CALLBACK WndProc( HWND hWnd, UINT iMsg, WPARAM wParam, LPARAM lParam )
|
||||
OnKeyDown(lParam);
|
||||
FreeLookInput((u32)wParam, lParam);
|
||||
}
|
||||
else if (wParam == WIIMOTE_DISCONNECT)
|
||||
{
|
||||
PostMessage(m_hParent, WM_USER, wParam, lParam);
|
||||
}
|
||||
break;
|
||||
|
||||
// Called when a screensaver wants to show up while this window is active
|
||||
|
@ -222,11 +222,11 @@ void RunGpu()
|
||||
{
|
||||
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
|
||||
|
||||
SaveSSEState();
|
||||
LoadDefaultSSEState();
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
ReadDataFromFifo(uData, 32);
|
||||
OpcodeDecoder_Run(g_bSkipCurrentFrame);
|
||||
LoadSSEState();
|
||||
FPURoundMode::LoadSIMDState();
|
||||
|
||||
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
||||
|
||||
|
52
Source/Core/VideoCommon/Src/GenericDLCache.cpp
Normal file
52
Source/Core/VideoCommon/Src/GenericDLCache.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
// Copyright (C) 2003-2009 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// TODO: Handle cache-is-full condition :p
|
||||
|
||||
|
||||
#include "Common.h"
|
||||
#include "DLCache.h"
|
||||
|
||||
namespace DLCache
|
||||
{
|
||||
|
||||
void Init()
|
||||
{
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
{
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
}
|
||||
|
||||
void ProgressiveCleanup()
|
||||
{
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// NOTE - outside the namespace on purpose.
|
||||
bool HandleDisplayList(u32 address, u32 size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void IncrementCheckContextId()
|
||||
{
|
||||
}
|
2216
Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp
Normal file
2216
Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -69,7 +69,8 @@ bool SaveTGA(const char* filename, int width, int height, void* pdata)
|
||||
|
||||
bool SaveData(const char* filename, const char* data)
|
||||
{
|
||||
std::ofstream f(filename, std::ios::binary);
|
||||
std::ofstream f;
|
||||
OpenFStream(f, filename, std::ios::binary);
|
||||
f << data;
|
||||
|
||||
return true;
|
||||
|
@ -15,6 +15,9 @@
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "Common.h"
|
||||
#include "IndexGenerator.h"
|
||||
|
||||
/*
|
||||
@ -27,24 +30,18 @@ QUAD simulator
|
||||
*/
|
||||
|
||||
//Init
|
||||
u16 *IndexGenerator::Tptr = 0;
|
||||
u16 *IndexGenerator::BASETptr = 0;
|
||||
u16 *IndexGenerator::Lptr = 0;
|
||||
u16 *IndexGenerator::BASELptr = 0;
|
||||
u16 *IndexGenerator::Pptr = 0;
|
||||
u16 *IndexGenerator::BASEPptr = 0;
|
||||
int IndexGenerator::numT = 0;
|
||||
int IndexGenerator::numL = 0;
|
||||
int IndexGenerator::numP = 0;
|
||||
int IndexGenerator::index = 0;
|
||||
int IndexGenerator::Tadds = 0;
|
||||
int IndexGenerator::Ladds = 0;
|
||||
int IndexGenerator::Padds = 0;
|
||||
IndexGenerator::IndexPrimitiveType IndexGenerator::LastTPrimitive = Prim_None;
|
||||
IndexGenerator::IndexPrimitiveType IndexGenerator::LastLPrimitive = Prim_None;
|
||||
bool IndexGenerator::used = false;
|
||||
u16 *IndexGenerator::Tptr;
|
||||
u16 *IndexGenerator::BASETptr;
|
||||
u16 *IndexGenerator::Lptr;
|
||||
u16 *IndexGenerator::BASELptr;
|
||||
u16 *IndexGenerator::Pptr;
|
||||
u16 *IndexGenerator::BASEPptr;
|
||||
u32 IndexGenerator::numT;
|
||||
u32 IndexGenerator::numL;
|
||||
u32 IndexGenerator::numP;
|
||||
u32 IndexGenerator::index;
|
||||
|
||||
void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr)
|
||||
void IndexGenerator::Start(u16* Triangleptr, u16* Lineptr, u16* Pointptr)
|
||||
{
|
||||
Tptr = Triangleptr;
|
||||
Lptr = Lineptr;
|
||||
@ -56,288 +53,116 @@ void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr)
|
||||
numT = 0;
|
||||
numL = 0;
|
||||
numP = 0;
|
||||
Tadds = 0;
|
||||
Ladds = 0;
|
||||
Padds = 0;
|
||||
LastTPrimitive = Prim_None;
|
||||
LastLPrimitive = Prim_None;
|
||||
}
|
||||
|
||||
void IndexGenerator::AddIndices(int primitive, u32 numVerts)
|
||||
{
|
||||
//switch (primitive)
|
||||
//{
|
||||
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVerts); break;
|
||||
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVerts); break;
|
||||
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVerts); break;
|
||||
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVerts); break;
|
||||
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVerts); break;
|
||||
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVerts); break;
|
||||
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVerts); break;
|
||||
//}
|
||||
|
||||
static void (*const primitive_table[])(u32) =
|
||||
{
|
||||
IndexGenerator::AddQuads,
|
||||
NULL,
|
||||
IndexGenerator::AddList,
|
||||
IndexGenerator::AddStrip,
|
||||
IndexGenerator::AddFan,
|
||||
IndexGenerator::AddLineList,
|
||||
IndexGenerator::AddLineStrip,
|
||||
IndexGenerator::AddPoints,
|
||||
};
|
||||
|
||||
primitive_table[primitive](numVerts);
|
||||
index += numVerts;
|
||||
}
|
||||
|
||||
// Triangles
|
||||
void IndexGenerator::AddList(int numVerts)
|
||||
__forceinline void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3)
|
||||
{
|
||||
//if we have no vertices return
|
||||
if(numVerts <= 0) return;
|
||||
int numTris = numVerts / 3;
|
||||
if (!numTris)
|
||||
{
|
||||
//if we have less than 3 verts
|
||||
if(numVerts == 1)
|
||||
{
|
||||
// discard
|
||||
index++;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
//we have two verts render a degenerated triangle
|
||||
numTris = 1;
|
||||
*Tptr++ = index;
|
||||
*Tptr++ = index+1;
|
||||
*Tptr++ = index;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numTris; i++)
|
||||
{
|
||||
*Tptr++ = index+i*3;
|
||||
*Tptr++ = index+i*3+1;
|
||||
*Tptr++ = index+i*3+2;
|
||||
}
|
||||
int baseRemainingverts = numVerts - numVerts % 3;
|
||||
switch (numVerts % 3)
|
||||
{
|
||||
case 2:
|
||||
//whe have 2 remaining verts use strip method
|
||||
*Tptr++ = index + baseRemainingverts - 1;
|
||||
*Tptr++ = index + baseRemainingverts;
|
||||
*Tptr++ = index + baseRemainingverts + 1;
|
||||
numTris++;
|
||||
break;
|
||||
case 1:
|
||||
//whe have 1 remaining verts use strip method this is only a conjeture
|
||||
*Tptr++ = index + baseRemainingverts - 2;
|
||||
*Tptr++ = index + baseRemainingverts - 1;
|
||||
*Tptr++ = index + baseRemainingverts;
|
||||
numTris++;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
index += numVerts;
|
||||
numT += numTris;
|
||||
Tadds++;
|
||||
LastTPrimitive = Prim_List;
|
||||
*Tptr++ = index1;
|
||||
*Tptr++ = index2;
|
||||
*Tptr++ = index3;
|
||||
|
||||
++numT;
|
||||
}
|
||||
|
||||
void IndexGenerator::AddStrip(int numVerts)
|
||||
void IndexGenerator::AddList(u32 const numVerts)
|
||||
{
|
||||
if(numVerts <= 0) return;
|
||||
int numTris = numVerts - 2;
|
||||
if (numTris < 1)
|
||||
auto const numTris = numVerts / 3;
|
||||
for (u32 i = 0; i != numTris; ++i)
|
||||
{
|
||||
//if we have less than 3 verts
|
||||
if(numVerts == 1)
|
||||
{
|
||||
// discard
|
||||
index++;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
//we have two verts render a degenerated triangle
|
||||
numTris = 1;
|
||||
*Tptr++ = index;
|
||||
*Tptr++ = index+1;
|
||||
*Tptr++ = index;
|
||||
}
|
||||
WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool wind = false;
|
||||
for (int i = 0; i < numTris; i++)
|
||||
{
|
||||
*Tptr++ = index+i;
|
||||
*Tptr++ = index+i+(wind?2:1);
|
||||
*Tptr++ = index+i+(wind?1:2);
|
||||
wind = !wind;
|
||||
}
|
||||
}
|
||||
index += numVerts;
|
||||
numT += numTris;
|
||||
Tadds++;
|
||||
LastTPrimitive = Prim_Strip;
|
||||
}
|
||||
void IndexGenerator::AddFan(int numVerts)
|
||||
{
|
||||
if(numVerts <= 0) return;
|
||||
int numTris = numVerts - 2;
|
||||
if (numTris < 1)
|
||||
{
|
||||
//if we have less than 3 verts
|
||||
if(numVerts == 1)
|
||||
{
|
||||
//Discard
|
||||
index++;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
//we have two verts render a degenerated triangle
|
||||
numTris = 1;
|
||||
*Tptr++ = index;
|
||||
*Tptr++ = index+1;
|
||||
*Tptr++ = index;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numTris; i++)
|
||||
{
|
||||
*Tptr++ = index;
|
||||
*Tptr++ = index+i+1;
|
||||
*Tptr++ = index+i+2;
|
||||
}
|
||||
}
|
||||
index += numVerts;
|
||||
numT += numTris;
|
||||
Tadds++;
|
||||
LastTPrimitive = Prim_Fan;
|
||||
}
|
||||
|
||||
void IndexGenerator::AddQuads(int numVerts)
|
||||
void IndexGenerator::AddStrip(u32 const numVerts)
|
||||
{
|
||||
if(numVerts <= 0) return;
|
||||
int numTris = (numVerts/4)*2;
|
||||
if (numTris == 0)
|
||||
bool wind = false;
|
||||
for (u32 i = 2; i < numVerts; ++i)
|
||||
{
|
||||
//if we have less than 3 verts
|
||||
if(numVerts == 1)
|
||||
{
|
||||
//discard
|
||||
index++;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numVerts == 2)
|
||||
{
|
||||
//we have two verts render a degenerated triangle
|
||||
numTris = 1;
|
||||
*Tptr++ = index;
|
||||
*Tptr++ = index + 1;
|
||||
*Tptr++ = index;
|
||||
}
|
||||
else
|
||||
{
|
||||
//we have 3 verts render a full triangle
|
||||
numTris = 1;
|
||||
*Tptr++ = index;
|
||||
*Tptr++ = index + 1;
|
||||
*Tptr++ = index + 2;
|
||||
}
|
||||
}
|
||||
WriteTriangle(
|
||||
index + i - 2,
|
||||
index + i - !wind,
|
||||
index + i - wind);
|
||||
|
||||
wind ^= true;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numTris / 2; i++)
|
||||
{
|
||||
*Tptr++ = index+i*4;
|
||||
*Tptr++ = index+i*4+1;
|
||||
*Tptr++ = index+i*4+2;
|
||||
*Tptr++ = index+i*4;
|
||||
*Tptr++ = index+i*4+2;
|
||||
*Tptr++ = index+i*4+3;
|
||||
}
|
||||
int baseRemainingverts = numVerts - numVerts % 4;
|
||||
switch (numVerts % 4)
|
||||
{
|
||||
case 3:
|
||||
//whe have 3 remaining verts use strip method
|
||||
*Tptr++ = index + baseRemainingverts;
|
||||
*Tptr++ = index + baseRemainingverts + 1;
|
||||
*Tptr++ = index + baseRemainingverts + 2;
|
||||
numTris++;
|
||||
break;
|
||||
case 2:
|
||||
//whe have 2 remaining verts use strip method
|
||||
*Tptr++ = index + baseRemainingverts - 1;
|
||||
*Tptr++ = index + baseRemainingverts;
|
||||
*Tptr++ = index + baseRemainingverts + 1;
|
||||
numTris++;
|
||||
break;
|
||||
case 1:
|
||||
//whe have 1 remaining verts use strip method this is only a conjeture
|
||||
*Tptr++ = index + baseRemainingverts - 2;
|
||||
*Tptr++ = index + baseRemainingverts - 1;
|
||||
*Tptr++ = index + baseRemainingverts;
|
||||
numTris++;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
index += numVerts;
|
||||
numT += numTris;
|
||||
Tadds++;
|
||||
LastTPrimitive = Prim_List;
|
||||
}
|
||||
|
||||
|
||||
//Lines
|
||||
void IndexGenerator::AddLineList(int numVerts)
|
||||
void IndexGenerator::AddFan(u32 numVerts)
|
||||
{
|
||||
if(numVerts <= 0) return;
|
||||
int numLines = numVerts / 2;
|
||||
if (!numLines)
|
||||
for (u32 i = 2; i < numVerts; ++i)
|
||||
{
|
||||
//Discard
|
||||
index++;
|
||||
return;
|
||||
WriteTriangle(index, index + i - 1, index + i);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numLines; i++)
|
||||
{
|
||||
*Lptr++ = index+i*2;
|
||||
*Lptr++ = index+i*2+1;
|
||||
}
|
||||
if((numVerts & 1) != 0)
|
||||
{
|
||||
//use line strip for remaining vert
|
||||
*Lptr++ = index + numLines * 2 - 1;
|
||||
*Lptr++ = index + numLines * 2;
|
||||
}
|
||||
}
|
||||
index += numVerts;
|
||||
numL += numLines;
|
||||
Ladds++;
|
||||
LastLPrimitive = Prim_List;
|
||||
}
|
||||
|
||||
void IndexGenerator::AddLineStrip(int numVerts)
|
||||
void IndexGenerator::AddQuads(u32 numVerts)
|
||||
{
|
||||
int numLines = numVerts - 1;
|
||||
if (numLines <= 0)
|
||||
auto const numQuads = numVerts / 4;
|
||||
for (u32 i = 0; i != numQuads; ++i)
|
||||
{
|
||||
if(numVerts == 1)
|
||||
{
|
||||
index++;
|
||||
}
|
||||
return;
|
||||
WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2);
|
||||
WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3);
|
||||
}
|
||||
for (int i = 0; i < numLines; i++)
|
||||
{
|
||||
*Lptr++ = index+i;
|
||||
*Lptr++ = index+i+1;
|
||||
}
|
||||
index += numVerts;
|
||||
numL += numLines;
|
||||
Ladds++;
|
||||
LastLPrimitive = Prim_Strip;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Points
|
||||
void IndexGenerator::AddPoints(int numVerts)
|
||||
// Lines
|
||||
void IndexGenerator::AddLineList(u32 numVerts)
|
||||
{
|
||||
for (int i = 0; i < numVerts; i++)
|
||||
auto const numLines = numVerts / 2;
|
||||
for (u32 i = 0; i != numLines; ++i)
|
||||
{
|
||||
*Pptr++ = index+i;
|
||||
*Lptr++ = index + i * 2;
|
||||
*Lptr++ = index + i * 2 + 1;
|
||||
++numL;
|
||||
}
|
||||
}
|
||||
|
||||
void IndexGenerator::AddLineStrip(u32 numVerts)
|
||||
{
|
||||
for (u32 i = 1; i < numVerts; ++i)
|
||||
{
|
||||
*Lptr++ = index + i - 1;
|
||||
*Lptr++ = index + i;
|
||||
++numL;
|
||||
}
|
||||
}
|
||||
|
||||
// Points
|
||||
void IndexGenerator::AddPoints(u32 numVerts)
|
||||
{
|
||||
for (u32 i = 0; i != numVerts; ++i)
|
||||
{
|
||||
*Pptr++ = index + i;
|
||||
++numP;
|
||||
}
|
||||
index += numVerts;
|
||||
numP += numVerts;
|
||||
Padds++;
|
||||
}
|
||||
|
@ -25,53 +25,58 @@
|
||||
class IndexGenerator
|
||||
{
|
||||
public:
|
||||
//Init
|
||||
// Init
|
||||
static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr);
|
||||
//Triangles
|
||||
static void AddList(int numVerts);
|
||||
static void AddStrip(int numVerts);
|
||||
static void AddFan(int numVerts);
|
||||
static void AddQuads(int numVerts);
|
||||
//Lines
|
||||
static void AddLineList(int numVerts);
|
||||
static void AddLineStrip(int numVerts);
|
||||
//Points
|
||||
static void AddPoints(int numVerts);
|
||||
//Interface
|
||||
static int GetNumTriangles() {used = true; return numT;}
|
||||
static int GetNumLines() {used = true;return numL;}
|
||||
static int GetNumPoints() {used = true;return numP;}
|
||||
static int GetNumVerts() {return index;} //returns numprimitives
|
||||
static int GetNumAdds() {return Tadds + Ladds + Padds;}
|
||||
static int GetTriangleindexLen() {return (int)(Tptr - BASETptr);}
|
||||
static int GetLineindexLen() {return (int)(Lptr - BASELptr);}
|
||||
static int GetPointindexLen() {return (int)(Pptr - BASEPptr);}
|
||||
|
||||
|
||||
static void AddIndices(int primitive, u32 numVertices);
|
||||
|
||||
// Interface
|
||||
static u32 GetNumTriangles() {return numT;}
|
||||
static u32 GetNumLines() {return numL;}
|
||||
static u32 GetNumPoints() {return numP;}
|
||||
|
||||
// returns numprimitives
|
||||
static u32 GetNumVerts() {return index;}
|
||||
|
||||
static u32 GetTriangleindexLen() {return (u32)(Tptr - BASETptr);}
|
||||
static u32 GetLineindexLen() {return (u32)(Lptr - BASELptr);}
|
||||
static u32 GetPointindexLen() {return (u32)(Pptr - BASEPptr);}
|
||||
/*
|
||||
enum IndexPrimitiveType
|
||||
{
|
||||
Prim_None = 0,
|
||||
Prim_List,
|
||||
Prim_Strip,
|
||||
Prim_Fan
|
||||
} ;
|
||||
};
|
||||
*/
|
||||
private:
|
||||
// Triangles
|
||||
static void AddList(u32 numVerts);
|
||||
static void AddStrip(u32 numVerts);
|
||||
static void AddFan(u32 numVerts);
|
||||
static void AddQuads(u32 numVerts);
|
||||
|
||||
// Lines
|
||||
static void AddLineList(u32 numVerts);
|
||||
static void AddLineStrip(u32 numVerts);
|
||||
|
||||
// Points
|
||||
static void AddPoints(u32 numVerts);
|
||||
|
||||
static void WriteTriangle(u32 index1, u32 index2, u32 index3);
|
||||
|
||||
static u16 *Tptr;
|
||||
static u16 *BASETptr;
|
||||
static u16 *Lptr;
|
||||
static u16 *BASELptr;
|
||||
static u16 *Pptr;
|
||||
static u16 *BASEPptr;
|
||||
static int numT;
|
||||
static int numL;
|
||||
static int numP;
|
||||
static int index;
|
||||
static int Tadds;
|
||||
static int Ladds;
|
||||
static int Padds;
|
||||
static IndexPrimitiveType LastTPrimitive;
|
||||
static IndexPrimitiveType LastLPrimitive;
|
||||
static bool used;
|
||||
|
||||
// TODO: redundant variables
|
||||
static u32 numT;
|
||||
static u32 numL;
|
||||
static u32 numP;
|
||||
static u32 index;
|
||||
};
|
||||
|
||||
#endif // _INDEXGENERATOR_H
|
||||
|
@ -21,6 +21,10 @@ volatile u32 s_swapRequested = false;
|
||||
u32 s_efbAccessRequested = false;
|
||||
volatile u32 s_FifoShuttingDown = false;
|
||||
|
||||
std::condition_variable s_perf_query_cond;
|
||||
std::mutex s_perf_query_lock;
|
||||
static volatile bool s_perf_query_requested;
|
||||
|
||||
static volatile struct
|
||||
{
|
||||
u32 xfbAddr;
|
||||
@ -169,6 +173,43 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool QueryResultIsReady()
|
||||
{
|
||||
return !s_perf_query_requested || s_FifoShuttingDown;
|
||||
}
|
||||
|
||||
void VideoFifo_CheckPerfQueryRequest()
|
||||
{
|
||||
if (s_perf_query_requested)
|
||||
{
|
||||
g_perf_query->FlushResults();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(s_perf_query_lock);
|
||||
s_perf_query_requested = false;
|
||||
}
|
||||
|
||||
s_perf_query_cond.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
|
||||
{
|
||||
// TODO: Is this check sane?
|
||||
if (!g_perf_query->IsFlushed())
|
||||
{
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
|
||||
{
|
||||
s_perf_query_requested = true;
|
||||
std::unique_lock<std::mutex> lk(s_perf_query_lock);
|
||||
s_perf_query_cond.wait(lk, QueryResultIsReady);
|
||||
}
|
||||
else
|
||||
g_perf_query->FlushResults();
|
||||
}
|
||||
|
||||
return g_perf_query->GetQueryResult(type);
|
||||
}
|
||||
|
||||
void VideoBackendHardware::InitializeShared()
|
||||
{
|
||||
@ -176,6 +217,7 @@ void VideoBackendHardware::InitializeShared()
|
||||
|
||||
s_swapRequested = 0;
|
||||
s_efbAccessRequested = 0;
|
||||
s_perf_query_requested = false;
|
||||
s_FifoShuttingDown = 0;
|
||||
memset((void*)&s_beginFieldArgs, 0, sizeof(s_beginFieldArgs));
|
||||
memset(&s_accessEFBArgs, 0, sizeof(s_accessEFBArgs));
|
||||
@ -186,6 +228,11 @@ void VideoBackendHardware::InitializeShared()
|
||||
// Run from the CPU thread
|
||||
void VideoBackendHardware::DoState(PointerWrap& p)
|
||||
{
|
||||
bool software = false;
|
||||
p.Do(software);
|
||||
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
|
||||
// change mode to abort load of incompatible save state.
|
||||
p.SetMode(PointerWrap::MODE_VERIFY);
|
||||
VideoCommon_DoState(p);
|
||||
p.DoMarker("VideoCommon");
|
||||
|
||||
@ -233,6 +280,7 @@ void VideoFifo_CheckAsyncRequest()
|
||||
{
|
||||
VideoFifo_CheckSwapRequest();
|
||||
VideoFifo_CheckEFBAccess();
|
||||
VideoFifo_CheckPerfQueryRequest();
|
||||
}
|
||||
|
||||
void VideoBackendHardware::Video_GatherPipeBursted()
|
||||
|
@ -22,7 +22,7 @@ namespace OSD
|
||||
{
|
||||
|
||||
// On-screen message display
|
||||
void AddMessage(const char* str, u32 ms);
|
||||
void AddMessage(const char* str, u32 ms = 2000);
|
||||
void DrawMessages(); // draw the current messages on the screen. Only call once per frame.
|
||||
void ClearMessages();
|
||||
|
||||
|
3
Source/Core/VideoCommon/Src/PerfQueryBase.cpp
Normal file
3
Source/Core/VideoCommon/Src/PerfQueryBase.cpp
Normal file
@ -0,0 +1,3 @@
|
||||
#include "PerfQueryBase.h"
|
||||
|
||||
PerfQueryBase* g_perf_query = 0;
|
54
Source/Core/VideoCommon/Src/PerfQueryBase.h
Normal file
54
Source/Core/VideoCommon/Src/PerfQueryBase.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef _PERFQUERY_BASE_H_
|
||||
#define _PERFQUERY_BASE_H_
|
||||
|
||||
#include "CommonTypes.h"
|
||||
|
||||
enum PerfQueryType
|
||||
{
|
||||
PQ_ZCOMP_INPUT_ZCOMPLOC = 0,
|
||||
PQ_ZCOMP_OUTPUT_ZCOMPLOC,
|
||||
PQ_ZCOMP_INPUT,
|
||||
PQ_ZCOMP_OUTPUT,
|
||||
PQ_BLEND_INPUT,
|
||||
PQ_EFB_COPY_CLOCKS,
|
||||
PQ_NUM_MEMBERS
|
||||
};
|
||||
|
||||
enum PerfQueryGroup
|
||||
{
|
||||
PQG_ZCOMP_ZCOMPLOC,
|
||||
PQG_ZCOMP,
|
||||
PQG_EFB_COPY_CLOCKS,
|
||||
PQG_NUM_MEMBERS,
|
||||
};
|
||||
|
||||
class PerfQueryBase
|
||||
{
|
||||
public:
|
||||
PerfQueryBase() {};
|
||||
virtual ~PerfQueryBase() {}
|
||||
|
||||
// Begin querying the specified value for the following host GPU commands
|
||||
virtual void EnableQuery(PerfQueryGroup type) {}
|
||||
|
||||
// Stop querying the specified value for the following host GPU commands
|
||||
virtual void DisableQuery(PerfQueryGroup type) {}
|
||||
|
||||
// Reset query counters to zero and drop any pending queries
|
||||
virtual void ResetQuery() {}
|
||||
|
||||
// Return the measured value for the specified query type
|
||||
// NOTE: Called from CPU thread
|
||||
virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
|
||||
|
||||
// Request the value of any pending queries - causes a pipeline flush and thus should be used carefully!
|
||||
virtual void FlushResults() {}
|
||||
|
||||
// True if there are no further pending query results
|
||||
// NOTE: Called from CPU thread
|
||||
virtual bool IsFlushed() const { return true; }
|
||||
};
|
||||
|
||||
extern PerfQueryBase* g_perf_query;
|
||||
|
||||
#endif // _PERFQUERY_H_
|
@ -28,10 +28,13 @@
|
||||
#include "ConfigManager.h"
|
||||
|
||||
#include "PixelEngine.h"
|
||||
#include "RenderBase.h"
|
||||
#include "CommandProcessor.h"
|
||||
#include "HW/ProcessorInterface.h"
|
||||
#include "DLCache.h"
|
||||
#include "State.h"
|
||||
#include "PerfQueryBase.h"
|
||||
|
||||
namespace PixelEngine
|
||||
{
|
||||
|
||||
@ -255,23 +258,59 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)
|
||||
break;
|
||||
}
|
||||
|
||||
case PE_PERF_0L:
|
||||
case PE_PERF_0H:
|
||||
case PE_PERF_1L:
|
||||
case PE_PERF_1H:
|
||||
case PE_PERF_2L:
|
||||
case PE_PERF_2H:
|
||||
case PE_PERF_3L:
|
||||
case PE_PERF_3H:
|
||||
case PE_PERF_4L:
|
||||
case PE_PERF_4H:
|
||||
case PE_PERF_5L:
|
||||
case PE_PERF_5H:
|
||||
INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress);
|
||||
// git r90a2096a24f4 (svn r3663) added the PE_PERF cases, without setting
|
||||
// _uReturnValue to anything, this reverts to the previous behaviour which allows
|
||||
// The timer in SMS:Scrubbing Serena Beach to countdown correctly
|
||||
_uReturnValue = 1;
|
||||
// NOTE(neobrain): only PE_PERF_ZCOMP_OUTPUT is implemented in D3D11, but the other values shouldn't be contradictionary to the value of that register (i.e. INPUT registers should always be greater or equal to their corresponding OUTPUT registers).
|
||||
case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_INPUT_L:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_INPUT_H:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_L:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_H:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_BLEND_INPUT_L:
|
||||
// Super Mario Sunshine uses this register in episode 6 of Sirena Beach:
|
||||
// The amount of remaining goop is determined by checking how many pixels reach the blending stage.
|
||||
// Once this register falls below a particular value (around 0x90), the game regards the challenge finished.
|
||||
// In very old builds, Dolphin only returned 0. That caused the challenge to be immediately finished without any goop being cleaned (the timer just didn't even start counting from 3:00:00).
|
||||
// Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all.
|
||||
// Note that currently this functionality is only implemented in the D3D11 backend.
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_BLEND_INPUT_H:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_EFB_COPY_CLOCKS_L:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_EFB_COPY_CLOCKS) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_EFB_COPY_CLOCKS_H:
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_EFB_COPY_CLOCKS) >> 16;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -36,19 +36,20 @@ enum
|
||||
PE_BBOX_TOP = 0x14, // Flip Top
|
||||
PE_BBOX_BOTTOM = 0x16, // Flip Bottom
|
||||
|
||||
// These have not yet been RE:d. They are the perf counters.
|
||||
PE_PERF_0L = 0x18,
|
||||
PE_PERF_0H = 0x1a,
|
||||
PE_PERF_1L = 0x1c,
|
||||
PE_PERF_1H = 0x1e,
|
||||
PE_PERF_2L = 0x20,
|
||||
PE_PERF_2H = 0x22,
|
||||
PE_PERF_3L = 0x24,
|
||||
PE_PERF_3H = 0x26,
|
||||
PE_PERF_4L = 0x28,
|
||||
PE_PERF_4H = 0x2a,
|
||||
PE_PERF_5L = 0x2c,
|
||||
PE_PERF_5H = 0x2e,
|
||||
// NOTE: Order not verified
|
||||
// These indicate the number of quads that are being used as input/output for each particular stage
|
||||
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18,
|
||||
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a,
|
||||
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c,
|
||||
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e,
|
||||
PE_PERF_ZCOMP_INPUT_L = 0x20,
|
||||
PE_PERF_ZCOMP_INPUT_H = 0x22,
|
||||
PE_PERF_ZCOMP_OUTPUT_L = 0x24,
|
||||
PE_PERF_ZCOMP_OUTPUT_H = 0x26,
|
||||
PE_PERF_BLEND_INPUT_L = 0x28,
|
||||
PE_PERF_BLEND_INPUT_H = 0x2a,
|
||||
PE_PERF_EFB_COPY_CLOCKS_L = 0x2c,
|
||||
PE_PERF_EFB_COPY_CLOCKS_H = 0x2e,
|
||||
};
|
||||
|
||||
namespace PixelEngine
|
||||
|
@ -252,7 +252,8 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::
|
||||
static int num_failures = 0;
|
||||
char szTemp[MAX_PATH];
|
||||
sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
|
||||
std::ofstream file(szTemp);
|
||||
std::ofstream file;
|
||||
OpenFStream(file, szTemp, std::ios_base::out);
|
||||
file << msg;
|
||||
file << "\n\nOld shader code:\n" << old_code;
|
||||
file << "\n\nNew shader code:\n" << new_code;
|
||||
@ -275,7 +276,7 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::
|
||||
static void WriteStage(char *&p, int n, API_TYPE ApiType);
|
||||
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
|
||||
// static void WriteAlphaCompare(char *&p, int num, int comp);
|
||||
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode);
|
||||
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool depthTextureEnable);
|
||||
static void WriteFog(char *&p);
|
||||
|
||||
static const char *tevKSelTableC[] = // KCSEL
|
||||
@ -510,6 +511,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
|
||||
BuildSwapModeTable(); // Needed for WriteStage
|
||||
int numStages = bpmem.genMode.numtevstages + 1;
|
||||
int numTexgen = bpmem.genMode.numtexgens;
|
||||
|
||||
bool depthTextureEnable = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable;
|
||||
|
||||
char *p = text;
|
||||
WRITE(p, "//Pixel Shader for TEV stages\n");
|
||||
@ -599,7 +602,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
|
||||
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
|
||||
WRITE(p, "out float4 ocol1;\n");
|
||||
|
||||
WRITE(p, "float depth;\n");
|
||||
if (depthTextureEnable)
|
||||
WRITE(p, "#define depth gl_FragDepth\n");
|
||||
WRITE(p, "float4 rawpos = gl_FragCoord;\n");
|
||||
|
||||
WRITE(p, "VARYIN float4 colors_02;\n");
|
||||
@ -654,14 +658,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
|
||||
{
|
||||
WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n",
|
||||
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "",
|
||||
"\n out float depth : DEPTH,",
|
||||
depthTextureEnable ? "\n out float depth : DEPTH," : "",
|
||||
ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS");
|
||||
}
|
||||
else
|
||||
{
|
||||
WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
|
||||
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
|
||||
"\n out float depth : SV_Depth,");
|
||||
depthTextureEnable ? "\n out float depth : SV_Depth," : "");
|
||||
}
|
||||
|
||||
WRITE(p, " in float4 colors_0 : COLOR0,\n");
|
||||
@ -804,13 +808,18 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
|
||||
|
||||
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
|
||||
if (Pretest == AlphaTest::UNDETERMINED)
|
||||
WriteAlphaTest(p, ApiType, dstAlphaMode);
|
||||
WriteAlphaTest(p, ApiType, dstAlphaMode, depthTextureEnable);
|
||||
|
||||
|
||||
// the screen space depth value = far z + (clip z / clip w) * z range
|
||||
WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
|
||||
|
||||
if(ApiType == API_OPENGL || ApiType == API_D3D11)
|
||||
WRITE(p, "float zCoord = rawpos.z;\n");
|
||||
else
|
||||
// dx9 doesn't support 4 component position, so we have to calculate it again
|
||||
WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
|
||||
|
||||
// Note: depth textures are disabled if early depth test is enabled
|
||||
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable)
|
||||
if (depthTextureEnable)
|
||||
{
|
||||
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
|
||||
WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
|
||||
@ -820,8 +829,9 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
|
||||
WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
|
||||
WRITE(p, "zCoord = frac(zCoord);\n");
|
||||
WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
|
||||
|
||||
WRITE(p, "depth = zCoord;\n");
|
||||
}
|
||||
WRITE(p, "depth = zCoord;\n");
|
||||
|
||||
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
|
||||
WRITE(p, "\tocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n");
|
||||
@ -841,10 +851,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
|
||||
WRITE(p, "\tocol0.a = " I_ALPHA"[0].a;\n");
|
||||
}
|
||||
|
||||
if (ApiType == API_OPENGL)
|
||||
{
|
||||
WRITE(p, "\tgl_FragDepth = depth;\n");
|
||||
}
|
||||
WRITE(p, "}\n");
|
||||
if (text[sizeof(text) - 1] != 0x7C)
|
||||
PanicAlert("PixelShader generator - buffer too small, canary has been eaten!");
|
||||
@ -1236,7 +1242,7 @@ static const char *tevAlphaFunclogicTable[] =
|
||||
" == " // xnor
|
||||
};
|
||||
|
||||
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode)
|
||||
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool depthTextureEnable)
|
||||
{
|
||||
static const char *alphaRef[2] =
|
||||
{
|
||||
@ -1260,7 +1266,8 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode
|
||||
WRITE(p, "\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
|
||||
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
|
||||
WRITE(p, "\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
|
||||
WRITE(p, "depth = 1.f;\n");
|
||||
if(depthTextureEnable)
|
||||
WRITE(p, "depth = 1.f;\n");
|
||||
|
||||
// HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
|
||||
// or after texturing and alpha test. PC GPUs have no way to support this
|
||||
|
@ -83,7 +83,9 @@ unsigned int Renderer::efb_scale_denominatorY = 1;
|
||||
unsigned int Renderer::ssaa_multiplier = 1;
|
||||
|
||||
|
||||
Renderer::Renderer() : frame_data(NULL), bLastFrameDumped(false)
|
||||
Renderer::Renderer()
|
||||
: frame_data()
|
||||
, bLastFrameDumped(false)
|
||||
{
|
||||
UpdateActiveConfig();
|
||||
TextureCache::OnConfigChanged(g_ActiveConfig);
|
||||
@ -110,7 +112,6 @@ Renderer::~Renderer()
|
||||
if (pFrameDump.IsOpen())
|
||||
pFrameDump.Close();
|
||||
#endif
|
||||
delete[] frame_data;
|
||||
}
|
||||
|
||||
void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
|
||||
|
@ -52,6 +52,15 @@ public:
|
||||
Renderer();
|
||||
virtual ~Renderer();
|
||||
|
||||
enum PixelPerfQuery {
|
||||
PP_ZCOMP_INPUT_ZCOMPLOC,
|
||||
PP_ZCOMP_OUTPUT_ZCOMPLOC,
|
||||
PP_ZCOMP_INPUT,
|
||||
PP_ZCOMP_OUTPUT,
|
||||
PP_BLEND_INPUT,
|
||||
PP_EFB_COPY_CLOCKS
|
||||
};
|
||||
|
||||
virtual void SetColorMask() = 0;
|
||||
virtual void SetBlendMode(bool forceUpdate) = 0;
|
||||
virtual void SetScissorRect(const TargetRectangle& rc) = 0;
|
||||
@ -147,7 +156,7 @@ protected:
|
||||
#else
|
||||
File::IOFile pFrameDump;
|
||||
#endif
|
||||
char* frame_data;
|
||||
std::vector<u8> frame_data;
|
||||
bool bLastFrameDumped;
|
||||
|
||||
// The framebuffer size
|
||||
|
@ -129,7 +129,10 @@ void TextureCache::Cleanup()
|
||||
TexCache::iterator tcend = textures.end();
|
||||
while (iter != tcend)
|
||||
{
|
||||
if (frameCount > TEXTURE_KILL_THRESHOLD + iter->second->frameCount) // TODO: Deleting EFB copies might not be a good idea here...
|
||||
if ( frameCount > TEXTURE_KILL_THRESHOLD + iter->second->frameCount
|
||||
|
||||
// EFB copies living on the host GPU are unrecoverable and thus shouldn't be deleted
|
||||
&& ! iter->second->IsEfbCopy() )
|
||||
{
|
||||
delete iter->second;
|
||||
textures.erase(iter++);
|
||||
@ -318,7 +321,7 @@ static TextureCache::TCacheEntryBase* ReturnEntry(unsigned int stage, TextureCac
|
||||
|
||||
TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
u32 const address, unsigned int width, unsigned int height, int const texformat,
|
||||
unsigned int const tlutaddr, int const tlutfmt, bool const use_mipmaps, unsigned int const maxlevel, bool const from_tmem)
|
||||
unsigned int const tlutaddr, int const tlutfmt, bool const use_mipmaps, unsigned int maxlevel, bool const from_tmem)
|
||||
{
|
||||
if (0 == address)
|
||||
return NULL;
|
||||
@ -345,7 +348,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
full_format = texformat | (tlutfmt << 16);
|
||||
|
||||
const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
|
||||
|
||||
|
||||
const u8* src_data;
|
||||
if (from_tmem)
|
||||
src_data = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE];
|
||||
@ -372,6 +375,11 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
tex_hash ^= tlut_hash;
|
||||
}
|
||||
|
||||
// D3D doesn't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain
|
||||
// e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,1x1, so we limit the mipmap count to 6 there
|
||||
while (g_ActiveConfig.backend_info.bUseMinimalMipCount && max(expandedWidth, expandedHeight) >> maxlevel == 0)
|
||||
--maxlevel;
|
||||
|
||||
TCacheEntryBase *entry = textures[texID];
|
||||
if (entry)
|
||||
{
|
||||
@ -456,7 +464,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
const bool using_custom_lods = using_custom_texture && CheckForCustomTextureLODs(tex_hash, texformat, texLevels);
|
||||
// Only load native mips if their dimensions fit to our virtual texture dimensions
|
||||
const bool use_native_mips = use_mipmaps && !using_custom_lods && (width == nativeW && height == nativeH);
|
||||
texLevels = (use_native_mips || using_custom_lods) ? texLevels : 1;
|
||||
texLevels = (use_native_mips || using_custom_lods) ? texLevels : 1; // TODO: Should be forced to 1 for non-pow2 textures (e.g. efb copies with automatically adjusted IR)
|
||||
|
||||
// create the entry/texture
|
||||
if (NULL == entry)
|
||||
@ -476,15 +484,20 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// load texture (CreateTexture also loads level 0)
|
||||
entry->Load(width, height, expandedWidth, 0);
|
||||
}
|
||||
|
||||
entry->SetGeneralParameters(address, texture_size, full_format, entry->num_mipmaps);
|
||||
entry->SetDimensions(nativeW, nativeH, width, height);
|
||||
entry->hash = tex_hash;
|
||||
if (entry->IsEfbCopy() && !g_ActiveConfig.bCopyEFBToTexture) entry->type = TCET_EC_DYNAMIC;
|
||||
else entry->type = TCET_NORMAL;
|
||||
|
||||
// load texture
|
||||
entry->Load(stage, width, height, expandedWidth, 0);
|
||||
|
||||
if (entry->IsEfbCopy() && !g_ActiveConfig.bCopyEFBToTexture)
|
||||
entry->type = TCET_EC_DYNAMIC;
|
||||
else
|
||||
entry->type = TCET_NORMAL;
|
||||
|
||||
if (g_ActiveConfig.bDumpTextures && !using_custom_texture)
|
||||
DumpTexture(entry, 0);
|
||||
@ -518,7 +531,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
|
||||
mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
|
||||
|
||||
entry->Load(stage, mip_width, mip_height, expanded_mip_width, level);
|
||||
entry->Load(mip_width, mip_height, expanded_mip_width, level);
|
||||
|
||||
if (g_ActiveConfig.bDumpTextures)
|
||||
DumpTexture(entry, level);
|
||||
@ -532,7 +545,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
unsigned int mip_height = CalculateLevelSize(height, level);
|
||||
|
||||
LoadCustomTexture(tex_hash, texformat, level, mip_width, mip_height);
|
||||
entry->Load(stage, mip_width, mip_height, mip_width, level);
|
||||
entry->Load(mip_width, mip_height, mip_width, level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ public:
|
||||
virtual void Bind(unsigned int stage) = 0;
|
||||
virtual bool Save(const char filename[], unsigned int level) = 0;
|
||||
|
||||
virtual void Load(unsigned int stage, unsigned int width, unsigned int height,
|
||||
virtual void Load(unsigned int width, unsigned int height,
|
||||
unsigned int expanded_width, unsigned int level) = 0;
|
||||
virtual void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
|
||||
unsigned int srcFormat, const EFBRectangle& srcRect,
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "MemoryUtil.h"
|
||||
#include "StringUtil.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
#include "x64ABI.h"
|
||||
#include "PixelEngine.h"
|
||||
#include "Host.h"
|
||||
|
||||
@ -43,8 +43,9 @@
|
||||
//BBox
|
||||
#include "XFMemory.h"
|
||||
extern float GC_ALIGNED16(g_fProjectionMatrix[16]);
|
||||
|
||||
#ifndef _M_GENERIC
|
||||
#define USE_JIT
|
||||
#endif
|
||||
|
||||
#define COMPILED_CODE_SIZE 4096
|
||||
|
||||
@ -72,6 +73,10 @@ int colElements[2];
|
||||
float posScale;
|
||||
float tcScale[8];
|
||||
|
||||
// bbox must read vertex position, so convert it to this buffer
|
||||
static float s_bbox_vertex_buffer[3];
|
||||
static u8 *s_bbox_pCurBufferPointer_orig;
|
||||
|
||||
static const float fractionTable[32] = {
|
||||
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
|
||||
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
|
||||
@ -82,8 +87,9 @@ static const float fractionTable[32] = {
|
||||
1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27),
|
||||
1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31),
|
||||
};
|
||||
|
||||
#ifdef USE_JIT
|
||||
using namespace Gen;
|
||||
#endif
|
||||
|
||||
void LOADERDECL PosMtx_ReadDirect_UByte()
|
||||
{
|
||||
@ -93,23 +99,38 @@ void LOADERDECL PosMtx_ReadDirect_UByte()
|
||||
|
||||
void LOADERDECL PosMtx_Write()
|
||||
{
|
||||
*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
|
||||
*VertexManager::s_pCurBufferPointer++ = 0;
|
||||
*VertexManager::s_pCurBufferPointer++ = 0;
|
||||
*VertexManager::s_pCurBufferPointer++ = 0;
|
||||
DataWrite<u8>(s_curposmtx);
|
||||
DataWrite<u8>(0);
|
||||
DataWrite<u8>(0);
|
||||
DataWrite<u8>(0);
|
||||
}
|
||||
|
||||
void LOADERDECL UpdateBoundingBoxPrepare()
|
||||
{
|
||||
if (!PixelEngine::bbox_active)
|
||||
return;
|
||||
|
||||
// set our buffer as videodata buffer, so we will get a copy of the vertex positions
|
||||
// this is a big hack, but so we can use the same converting function then without bbox
|
||||
s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer;
|
||||
VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer;
|
||||
}
|
||||
|
||||
void LOADERDECL UpdateBoundingBox()
|
||||
{
|
||||
if (!PixelEngine::bbox_active)
|
||||
return;
|
||||
|
||||
// reset videodata pointer
|
||||
VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig;
|
||||
|
||||
// copy vertex pointers
|
||||
memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
|
||||
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
|
||||
// memory like we might have been with a D3D vertex buffer, this would have been a bad idea.
|
||||
float *data = (float *)(VertexManager::s_pCurBufferPointer - 12);
|
||||
// We must transform the just loaded point by the current world and projection matrix - in software.
|
||||
// Then convert to screen space and update the bounding box.
|
||||
float p[3] = {data[0], data[1], data[2]};
|
||||
float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]};
|
||||
|
||||
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
|
||||
const float *proj_matrix = &g_fProjectionMatrix[0];
|
||||
@ -147,24 +168,22 @@ void LOADERDECL TexMtx_ReadDirect_UByte()
|
||||
|
||||
void LOADERDECL TexMtx_Write_Float()
|
||||
{
|
||||
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
|
||||
}
|
||||
|
||||
void LOADERDECL TexMtx_Write_Float2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
DataWrite(0.f);
|
||||
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
|
||||
}
|
||||
|
||||
void LOADERDECL TexMtx_Write_Float4()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = 0;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0.
|
||||
VertexManager::s_pCurBufferPointer += 16;
|
||||
DataWrite(0.f);
|
||||
DataWrite(0.f);
|
||||
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
|
||||
// Just to fill out with 0.
|
||||
DataWrite(0.f);
|
||||
}
|
||||
|
||||
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||
@ -182,14 +201,19 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||
m_VtxDesc = vtx_desc;
|
||||
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
|
||||
|
||||
#ifdef USE_JIT
|
||||
AllocCodeSpace(COMPILED_CODE_SIZE);
|
||||
CompileVertexTranslator();
|
||||
WriteProtect();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
VertexLoader::~VertexLoader()
|
||||
{
|
||||
#ifdef USE_JIT
|
||||
FreeCodeSpace();
|
||||
#endif
|
||||
delete m_NativeFmt;
|
||||
}
|
||||
|
||||
@ -267,15 +291,16 @@ void VertexLoader::CompileVertexTranslator()
|
||||
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
|
||||
|
||||
// Write vertex position loader
|
||||
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
|
||||
if(g_ActiveConfig.bUseBBox) {
|
||||
WriteCall(UpdateBoundingBoxPrepare);
|
||||
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
|
||||
WriteCall(UpdateBoundingBox);
|
||||
} else {
|
||||
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
|
||||
}
|
||||
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
|
||||
nat_offset += 12;
|
||||
|
||||
// OK, so we just got a point. Let's go back and read it for the bounding box.
|
||||
|
||||
if(g_ActiveConfig.bUseBBox)
|
||||
WriteCall(UpdateBoundingBox);
|
||||
|
||||
// Normals
|
||||
vtx_decl.num_normals = 0;
|
||||
if (m_VtxDesc.Normal != NOT_PRESENT)
|
||||
@ -474,7 +499,8 @@ void VertexLoader::WriteCall(TPipelineFunction func)
|
||||
m_PipelineStages[m_numPipelineStages++] = func;
|
||||
#endif
|
||||
}
|
||||
|
||||
// ARMTODO: This should be done in a better way
|
||||
#ifndef _M_GENERIC
|
||||
void VertexLoader::WriteGetVariable(int bits, OpArg dest, void *address)
|
||||
{
|
||||
#ifdef USE_JIT
|
||||
@ -498,8 +524,9 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
||||
int VertexLoader::SetupRunVertices(int vtx_attr_group, int primitive, int const count)
|
||||
{
|
||||
m_numLoadedVertices += count;
|
||||
|
||||
@ -518,7 +545,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
||||
{
|
||||
// if cull mode is none, ignore triangles and quads
|
||||
DataSkip(count * m_VertexSize);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
||||
@ -542,157 +569,48 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
||||
for (int i = 0; i < 2; i++)
|
||||
colElements[i] = m_VtxAttr.color[i].Elements;
|
||||
|
||||
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
|
||||
int granularity = 1;
|
||||
switch (primitive) {
|
||||
case 3: // strip .. hm, weird
|
||||
case 4: // fan
|
||||
if (VertexManager::GetRemainingSize() < 3 * native_stride)
|
||||
VertexManager::Flush();
|
||||
break;
|
||||
case 6: // line strip
|
||||
if (VertexManager::GetRemainingSize() < 2 * native_stride)
|
||||
VertexManager::Flush();
|
||||
break;
|
||||
case 0: granularity = 4; break; // quads
|
||||
case 2: granularity = 3; break; // tris
|
||||
case 5: granularity = 2; break; // lines
|
||||
}
|
||||
|
||||
int startv = 0, extraverts = 0;
|
||||
int v = 0;
|
||||
|
||||
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
|
||||
while (v < count)
|
||||
{
|
||||
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
|
||||
//if (remainingVerts2 - v + startv < remainingVerts)
|
||||
//remainingVerts = remainingVerts2 - v + startv;
|
||||
if (remainingVerts < granularity) {
|
||||
INCSTAT(stats.thisFrame.numBufferSplits);
|
||||
// This buffer full - break current primitive and flush, to switch to the next buffer.
|
||||
u8* plastptr = VertexManager::s_pCurBufferPointer;
|
||||
if (v - startv > 0)
|
||||
VertexManager::AddVertices(primitive, v - startv + extraverts);
|
||||
VertexManager::Flush();
|
||||
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
|
||||
// Why does this need to be so complicated?
|
||||
switch (primitive) {
|
||||
case 3: // triangle strip, copy last two vertices
|
||||
// a little trick since we have to keep track of signs
|
||||
if (v & 1) {
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride*3;
|
||||
extraverts = 3;
|
||||
}
|
||||
else {
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
|
||||
VertexManager::s_pCurBufferPointer += native_stride*2;
|
||||
extraverts = 2;
|
||||
}
|
||||
break;
|
||||
case 4: // tri fan, copy first and last vert
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
extraverts = 2;
|
||||
break;
|
||||
case 6: // line strip
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
extraverts = 1;
|
||||
break;
|
||||
default:
|
||||
extraverts = 0;
|
||||
break;
|
||||
}
|
||||
startv = v;
|
||||
}
|
||||
int remainingPrims = remainingVerts / granularity;
|
||||
remainingVerts = remainingPrims * granularity;
|
||||
if (count - v < remainingVerts)
|
||||
remainingVerts = count - v;
|
||||
|
||||
#ifdef USE_JIT
|
||||
if (remainingVerts > 0) {
|
||||
loop_counter = remainingVerts;
|
||||
((void (*)())(void*)m_compiledCode)();
|
||||
}
|
||||
#else
|
||||
for (int s = 0; s < remainingVerts; s++)
|
||||
{
|
||||
tcIndex = 0;
|
||||
colIndex = 0;
|
||||
s_texmtxwrite = s_texmtxread = 0;
|
||||
for (int i = 0; i < m_numPipelineStages; i++)
|
||||
m_PipelineStages[i]();
|
||||
PRIM_LOG("\n");
|
||||
}
|
||||
#endif
|
||||
v += remainingVerts;
|
||||
}
|
||||
|
||||
if (startv < count)
|
||||
VertexManager::AddVertices(primitive, count - startv + extraverts);
|
||||
VertexManager::PrepareForAdditionalData(primitive, count, native_stride);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
|
||||
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count)
|
||||
{
|
||||
m_numLoadedVertices += count;
|
||||
|
||||
// Flush if our vertex format is different from the currently set.
|
||||
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
|
||||
{
|
||||
// We really must flush here. It's possible that the native representations
|
||||
// of the two vtx formats are the same, but we have no way to easily check that
|
||||
// now.
|
||||
VertexManager::Flush();
|
||||
// Also move the Set() here?
|
||||
}
|
||||
g_nativeVertexFmt = m_NativeFmt;
|
||||
|
||||
if (bpmem.genMode.cullmode == 3 && primitive < 5)
|
||||
{
|
||||
// if cull mode is none, ignore triangles and quads
|
||||
DataSkip(count * m_VertexSize);
|
||||
return;
|
||||
}
|
||||
|
||||
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
||||
|
||||
// Load position and texcoord scale factors.
|
||||
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
|
||||
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
|
||||
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
|
||||
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
|
||||
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
|
||||
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
|
||||
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
|
||||
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
|
||||
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
||||
|
||||
pVtxAttr = &m_VtxAttr;
|
||||
posScale = fractionTable[m_VtxAttr.PosFrac];
|
||||
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
||||
for (int i = 0; i < 8; i++)
|
||||
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
|
||||
for (int i = 0; i < 2; i++)
|
||||
colElements[i] = m_VtxAttr.color[i].Elements;
|
||||
|
||||
if(VertexManager::GetRemainingSize() < native_stride * count)
|
||||
VertexManager::Flush();
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);
|
||||
VertexManager::s_pCurBufferPointer += native_stride * count;
|
||||
DataSkip(count * m_VertexSize);
|
||||
VertexManager::AddVertices(primitive, count);
|
||||
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
|
||||
ConvertVertices(new_count);
|
||||
VertexManager::AddVertices(primitive, new_count);
|
||||
}
|
||||
|
||||
void VertexLoader::ConvertVertices ( int count )
|
||||
{
|
||||
#ifdef USE_JIT
|
||||
if (count > 0) {
|
||||
loop_counter = count;
|
||||
((void (*)())(void*)m_compiledCode)();
|
||||
}
|
||||
#else
|
||||
for (int s = 0; s < count; s++)
|
||||
{
|
||||
tcIndex = 0;
|
||||
colIndex = 0;
|
||||
s_texmtxwrite = s_texmtxread = 0;
|
||||
for (int i = 0; i < m_numPipelineStages; i++)
|
||||
m_PipelineStages[i]();
|
||||
PRIM_LOG("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data)
|
||||
{
|
||||
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
|
||||
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * new_count);
|
||||
VertexManager::s_pCurBufferPointer += native_stride * new_count;
|
||||
DataSkip(new_count * m_VertexSize);
|
||||
|
||||
VertexManager::AddVertices(primitive, new_count);
|
||||
}
|
||||
|
||||
void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
|
||||
{
|
||||
|
@ -76,13 +76,20 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
// ARMTODO: This should be done in a better way
|
||||
#ifndef _M_GENERIC
|
||||
class VertexLoader : public Gen::XCodeBlock, NonCopyable
|
||||
#else
|
||||
class VertexLoader
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
||||
~VertexLoader();
|
||||
|
||||
int GetVertexSize() const {return m_VertexSize;}
|
||||
|
||||
int SetupRunVertices(int vtx_attr_group, int primitive, int const count);
|
||||
void RunVertices(int vtx_attr_group, int primitive, int count);
|
||||
void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data);
|
||||
|
||||
@ -119,11 +126,14 @@ private:
|
||||
void SetVAT(u32 _group0, u32 _group1, u32 _group2);
|
||||
|
||||
void CompileVertexTranslator();
|
||||
void ConvertVertices(int count);
|
||||
|
||||
void WriteCall(TPipelineFunction);
|
||||
|
||||
#ifndef _M_GENERIC
|
||||
void WriteGetVariable(int bits, Gen::OpArg dest, void *address);
|
||||
void WriteSetVariable(int bits, void *address, Gen::OpArg dest);
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -15,9 +15,6 @@
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _VERTEXLOADERCOLOR_H
|
||||
#define _VERTEXLOADERCOLOR_H
|
||||
|
||||
#include "Common.h"
|
||||
#include "VideoCommon.h"
|
||||
#include "LookUpTables.h"
|
||||
@ -37,8 +34,7 @@ extern int colElements[2];
|
||||
|
||||
__forceinline void _SetCol(u32 val)
|
||||
{
|
||||
*(u32*)VertexManager::s_pCurBufferPointer = val;
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
DataWrite(val);
|
||||
colIndex++;
|
||||
}
|
||||
|
||||
@ -132,80 +128,65 @@ void LOADERDECL Color_ReadDirect_32b_8888()
|
||||
_SetCol(col);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void LOADERDECL Color_ReadIndex8_16b_565()
|
||||
template <typename I>
|
||||
void Color_ReadIndex_16b_565()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
auto const Index = DataRead<I>();
|
||||
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
|
||||
_SetCol565(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_24b_888()
|
||||
|
||||
template <typename I>
|
||||
void Color_ReadIndex_24b_888()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
auto const Index = DataRead<I>();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_32b_888x()
|
||||
|
||||
template <typename I>
|
||||
void Color_ReadIndex_32b_888x()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
auto const Index = DataRead<I>();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_16b_4444()
|
||||
|
||||
template <typename I>
|
||||
void Color_ReadIndex_16b_4444()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
auto const Index = DataRead<I>();
|
||||
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
|
||||
_SetCol4444(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_24b_6666()
|
||||
|
||||
template <typename I>
|
||||
void Color_ReadIndex_24b_6666()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
auto const Index = DataRead<I>();
|
||||
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
|
||||
u32 val = Common::swap32(pData);
|
||||
_SetCol6666(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex8_32b_8888()
|
||||
|
||||
template <typename I>
|
||||
void Color_ReadIndex_32b_8888()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
auto const Index = DataRead<I>();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read32(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_16b_565()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
|
||||
_SetCol565(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_24b_888()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_32b_888x()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_16b_4444()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
|
||||
_SetCol4444(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_24b_6666()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
|
||||
u32 val = Common::swap32(pData);
|
||||
_SetCol6666(val);
|
||||
}
|
||||
void LOADERDECL Color_ReadIndex16_32b_8888()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read32(iAddress));
|
||||
}
|
||||
#endif
|
||||
|
||||
void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565<u8>(); }
|
||||
void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888<u8>(); }
|
||||
void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x<u8>(); }
|
||||
void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444<u8>(); }
|
||||
void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666<u8>(); }
|
||||
void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888<u8>(); }
|
||||
|
||||
void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565<u16>(); }
|
||||
void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888<u16>(); }
|
||||
void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x<u16>(); }
|
||||
void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444<u16>(); }
|
||||
void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666<u16>(); }
|
||||
void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888<u16>(); }
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "VertexManagerBase.h"
|
||||
#include "CPUDetect.h"
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
#include <smmintrin.h>
|
||||
@ -30,78 +31,163 @@
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
|
||||
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
__forceinline float FracAdjust(T val)
|
||||
{
|
||||
//auto const S8FRAC = 1.f / (1u << 6);
|
||||
//auto const U8FRAC = 1.f / (1u << 7);
|
||||
//auto const S16FRAC = 1.f / (1u << 14);
|
||||
//auto const U16FRAC = 1.f / (1u << 15);
|
||||
|
||||
// TODO: is this right?
|
||||
return val / float(1u << (sizeof(T) * 8 - std::numeric_limits<T>::is_signed - 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline float FracAdjust(float val)
|
||||
{ return val; }
|
||||
|
||||
template <typename T, int N>
|
||||
__forceinline void ReadIndirect(const T* data)
|
||||
{
|
||||
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
|
||||
|
||||
for (int i = 0; i != N; ++i)
|
||||
{
|
||||
DataWrite(FracAdjust(Common::FromBigEndian(data[i])));
|
||||
}
|
||||
|
||||
LOG_NORM();
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
struct Normal_Direct
|
||||
{
|
||||
static void LOADERDECL function()
|
||||
{
|
||||
auto const source = reinterpret_cast<const T*>(DataGetPosition());
|
||||
ReadIndirect<T, N * 3>(source);
|
||||
DataSkip<N * 3 * sizeof(T)>();
|
||||
}
|
||||
|
||||
static const int size = sizeof(T) * N * 3;
|
||||
};
|
||||
|
||||
template <typename I, typename T, int N, int Offset>
|
||||
__forceinline void Normal_Index_Offset()
|
||||
{
|
||||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||
|
||||
auto const index = DataRead<I>();
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
|
||||
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
|
||||
ReadIndirect<T, N * 3>(data);
|
||||
}
|
||||
|
||||
template <typename I, typename T, int N>
|
||||
struct Normal_Index
|
||||
{
|
||||
static void LOADERDECL function()
|
||||
{
|
||||
Normal_Index_Offset<I, T, N, 0>();
|
||||
}
|
||||
|
||||
static const int size = sizeof(I);
|
||||
};
|
||||
|
||||
template <typename I, typename T>
|
||||
struct Normal_Index_Indices3
|
||||
{
|
||||
static void LOADERDECL function()
|
||||
{
|
||||
Normal_Index_Offset<I, T, 1, 0>();
|
||||
Normal_Index_Offset<I, T, 1, 1>();
|
||||
Normal_Index_Offset<I, T, 1, 2>();
|
||||
}
|
||||
|
||||
static const int size = sizeof(I) * 3;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void VertexLoader_Normal::Init(void)
|
||||
{
|
||||
// HACK is for signed instead of unsigned to prevent crashes.
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
|
||||
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
|
||||
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1);
|
||||
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3);
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3);
|
||||
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1);
|
||||
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3);
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3);
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
|
||||
|
||||
// Same as above
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
|
||||
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
|
||||
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();
|
||||
|
||||
// Same as above for NRM_NBT
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
|
||||
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
|
||||
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();
|
||||
|
||||
// Same as above for NRM_NBT
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
|
||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
|
||||
}
|
||||
|
||||
unsigned int VertexLoader_Normal::GetSize(unsigned int _type,
|
||||
@ -116,312 +202,3 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type,
|
||||
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
|
||||
return pFunc;
|
||||
}
|
||||
|
||||
// This fracs are fixed acording to format
|
||||
#define S8FRAC 0.015625f; // 1.0f / (1U << 6)
|
||||
#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14)
|
||||
// --- Direct ---
|
||||
|
||||
inline void ReadIndirectS8x3(const s8* pData)
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORM();
|
||||
}
|
||||
|
||||
inline void ReadIndirectS8x9(const s8* pData)
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
|
||||
LOG_NORM();
|
||||
((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC;
|
||||
LOG_NORM();
|
||||
((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC;
|
||||
LOG_NORM();
|
||||
VertexManager::s_pCurBufferPointer += 36;
|
||||
}
|
||||
|
||||
inline void ReadIndirectS16x3(const u16* pData)
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORM()
|
||||
}
|
||||
|
||||
inline void ReadIndirectS16x9(const u16* pData)
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
|
||||
LOG_NORM()
|
||||
((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC;
|
||||
LOG_NORM()
|
||||
((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC;
|
||||
LOG_NORM()
|
||||
VertexManager::s_pCurBufferPointer += 36;
|
||||
}
|
||||
|
||||
inline void ReadIndirectFloatx3(const u32* pData)
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
LOG_NORM();
|
||||
}
|
||||
|
||||
inline void ReadIndirectFloatx9(const u32* pData)
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
||||
LOG_NORM();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]);
|
||||
LOG_NORM();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]);
|
||||
LOG_NORM();
|
||||
VertexManager::s_pCurBufferPointer += 36;
|
||||
}
|
||||
|
||||
inline void ReadDirectS8x3()
|
||||
{
|
||||
const s8* Source = (const s8*)DataGetPosition();
|
||||
ReadIndirectS8x3(Source);
|
||||
DataSkip(3);
|
||||
}
|
||||
|
||||
inline void ReadDirectS8x9()
|
||||
{
|
||||
const s8* Source = (const s8*)DataGetPosition();
|
||||
ReadIndirectS8x9(Source);
|
||||
DataSkip(9);
|
||||
}
|
||||
|
||||
inline void ReadDirectS16x3()
|
||||
{
|
||||
const u16* Source = (const u16*)DataGetPosition();
|
||||
ReadIndirectS16x3(Source);
|
||||
DataSkip(6);
|
||||
}
|
||||
|
||||
inline void ReadDirectS16x9()
|
||||
{
|
||||
const u16* Source = (const u16*)DataGetPosition();
|
||||
ReadIndirectS16x9(Source);
|
||||
DataSkip(18);
|
||||
}
|
||||
|
||||
inline void ReadDirectFloatx3()
|
||||
{
|
||||
const u32* Source = (const u32*)DataGetPosition();
|
||||
ReadIndirectFloatx3(Source);
|
||||
DataSkip(12);
|
||||
}
|
||||
|
||||
inline void ReadDirectFloatx9()
|
||||
{
|
||||
const u32* Source = (const u32*)DataGetPosition();
|
||||
ReadIndirectFloatx9(Source);
|
||||
DataSkip(36);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
|
||||
{
|
||||
ReadDirectS8x3();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
|
||||
{
|
||||
ReadDirectS16x3();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
|
||||
{
|
||||
ReadDirectFloatx3();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
|
||||
{
|
||||
ReadDirectS8x9();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
|
||||
{
|
||||
ReadDirectS16x9();
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
|
||||
{
|
||||
ReadDirectFloatx9();
|
||||
}
|
||||
|
||||
|
||||
// --- Index8 ---
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS8x3(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS16x3(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectFloatx3(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS8x9(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS16x9(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectFloatx9(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
|
||||
ReadIndirectS8x3(pData);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
|
||||
ReadIndirectS16x3(pData);
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
|
||||
ReadIndirectFloatx3(pData);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// --- Index16 ---
|
||||
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS8x3(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS16x3(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectFloatx3(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS8x9(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectS16x9(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
|
||||
ReadIndirectFloatx9(pData);
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
|
||||
ReadIndirectS8x3(pData);
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
|
||||
ReadIndirectS16x3(pData);
|
||||
}
|
||||
}
|
||||
|
||||
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
|
||||
ReadIndirectFloatx3(pData);
|
||||
}
|
||||
}
|
||||
|
@ -70,45 +70,20 @@ private:
|
||||
NUM_NRM_INDICES
|
||||
};
|
||||
|
||||
struct Set {
|
||||
Set() {}
|
||||
Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {}
|
||||
struct Set
|
||||
{
|
||||
template <typename T>
|
||||
void operator=(const T&)
|
||||
{
|
||||
gc_size = T::size;
|
||||
function = T::function;
|
||||
}
|
||||
|
||||
int gc_size;
|
||||
TPipelineFunction function;
|
||||
// int pc_size;
|
||||
};
|
||||
|
||||
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
|
||||
|
||||
// direct
|
||||
static void LOADERDECL Normal_DirectByte();
|
||||
static void LOADERDECL Normal_DirectShort();
|
||||
static void LOADERDECL Normal_DirectFloat();
|
||||
static void LOADERDECL Normal_DirectByte3();
|
||||
static void LOADERDECL Normal_DirectShort3();
|
||||
static void LOADERDECL Normal_DirectFloat3();
|
||||
|
||||
// index8
|
||||
static void LOADERDECL Normal_Index8_Byte();
|
||||
static void LOADERDECL Normal_Index8_Short();
|
||||
static void LOADERDECL Normal_Index8_Float();
|
||||
static void LOADERDECL Normal_Index8_Byte3_Indices1();
|
||||
static void LOADERDECL Normal_Index8_Short3_Indices1();
|
||||
static void LOADERDECL Normal_Index8_Float3_Indices1();
|
||||
static void LOADERDECL Normal_Index8_Byte3_Indices3();
|
||||
static void LOADERDECL Normal_Index8_Short3_Indices3();
|
||||
static void LOADERDECL Normal_Index8_Float3_Indices3();
|
||||
|
||||
// index16
|
||||
static void LOADERDECL Normal_Index16_Byte();
|
||||
static void LOADERDECL Normal_Index16_Short();
|
||||
static void LOADERDECL Normal_Index16_Float();
|
||||
static void LOADERDECL Normal_Index16_Byte3_Indices1();
|
||||
static void LOADERDECL Normal_Index16_Short3_Indices1();
|
||||
static void LOADERDECL Normal_Index16_Float3_Indices1();
|
||||
static void LOADERDECL Normal_Index16_Byte3_Indices3();
|
||||
static void LOADERDECL Normal_Index16_Short3_Indices3();
|
||||
static void LOADERDECL Normal_Index16_Float3_Indices3();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -15,6 +15,8 @@
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "Common.h"
|
||||
#include "VideoCommon.h"
|
||||
#include "VertexLoader.h"
|
||||
@ -71,101 +73,42 @@ MOVUPS(MOffset(EDI, 0), XMM0);
|
||||
|
||||
*/
|
||||
|
||||
// ==============================================================================
|
||||
// Direct
|
||||
// ==============================================================================
|
||||
|
||||
template <class T, bool three>
|
||||
void Pos_ReadDirect()
|
||||
template <typename T>
|
||||
float PosScale(T val)
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead<T>() * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead<T>() * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead<T>() * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
return val * posScale;
|
||||
}
|
||||
|
||||
template <>
|
||||
float PosScale(float val)
|
||||
{ return val; }
|
||||
|
||||
template <typename T, int N>
|
||||
void LOADERDECL Pos_ReadDirect()
|
||||
{
|
||||
static_assert(N <= 3, "N > 3 is not sane!");
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
DataWrite(i<N ? PosScale(DataRead<T>()) : 0.f);
|
||||
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); }
|
||||
void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); }
|
||||
void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
|
||||
void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect<s16, true>(); }
|
||||
void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect<u8, false>(); }
|
||||
void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect<s8, false>(); }
|
||||
void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect<u16, false>(); }
|
||||
void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect<s16, false>(); }
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_Float3()
|
||||
template <typename I, typename T, int N>
|
||||
void LOADERDECL Pos_ReadIndex()
|
||||
{
|
||||
// No need to use floating point here.
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
void LOADERDECL Pos_ReadDirect_Float2()
|
||||
{
|
||||
// No need to use floating point here.
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
|
||||
|
||||
template<class T, bool three,int MaxSize>
|
||||
inline void Pos_ReadIndex_Byte(int Index)
|
||||
{
|
||||
if(Index < MaxSize)
|
||||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||
static_assert(N <= 3, "N > 3 is not sane!");
|
||||
|
||||
auto const index = DataRead<I>();
|
||||
if (index < std::numeric_limits<I>::max())
|
||||
{
|
||||
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
DataWrite(i<N ? PosScale(Common::FromBigEndian(data[i])) : 0.f);
|
||||
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, bool three,int MaxSize>
|
||||
inline void Pos_ReadIndex_Short(int Index)
|
||||
{
|
||||
if(Index < MaxSize)
|
||||
{
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
|
||||
if (three)
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool three,int MaxSize>
|
||||
void Pos_ReadIndex_Float(int Index)
|
||||
{
|
||||
if(Index < MaxSize)
|
||||
{
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
if (three)
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
||||
else
|
||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
|
||||
@ -173,87 +116,22 @@ void Pos_ReadIndex_Float(int Index)
|
||||
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
||||
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
||||
|
||||
template<bool three,int MaxSize>
|
||||
void Pos_ReadIndex_Float_SSSE3(int Index)
|
||||
template <typename I, bool three>
|
||||
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
|
||||
{
|
||||
if(Index < MaxSize)
|
||||
auto const index = DataRead<I>();
|
||||
if (index < std::numeric_limits<I>::max())
|
||||
{
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
|
||||
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
|
||||
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
|
||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||
VertexManager::s_pCurBufferPointer += sizeof(float) * 3;
|
||||
LOG_VTX();
|
||||
VertexManager::s_pCurBufferPointer += 12;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Explicitly instantiate these functions to decrease the possibility of
|
||||
// symbol binding problems when (only) calling them from JIT compiled code.
|
||||
template void Pos_ReadDirect<u8, true>();
|
||||
template void Pos_ReadDirect<s8, true>();
|
||||
template void Pos_ReadDirect<u16, true>();
|
||||
template void Pos_ReadDirect<s16, true>();
|
||||
template void Pos_ReadDirect<u8, false>();
|
||||
template void Pos_ReadDirect<s8, false>();
|
||||
template void Pos_ReadDirect<u16, false>();
|
||||
template void Pos_ReadDirect<s16, false>();
|
||||
template void Pos_ReadIndex_Byte<u8, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Float<true, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Float<false, 255>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Float<true, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Byte<u8, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Byte<s8, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<u16, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Short<s16, false, 65535>(int Index);
|
||||
template void Pos_ReadIndex_Float<false, 65535>(int Index);
|
||||
|
||||
// ==============================================================================
|
||||
// Index 8
|
||||
// ==============================================================================
|
||||
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false, 255>(DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false, 255> (DataReadU8());}
|
||||
|
||||
// ==============================================================================
|
||||
// Index 16
|
||||
// ==============================================================================
|
||||
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false, 65535>(DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false, 65535> (DataReadU16());}
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 255> (DataReadU8());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 65535> (DataReadU16());}
|
||||
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 65535> (DataReadU16());}
|
||||
#endif
|
||||
|
||||
static TPipelineFunction tableReadPosition[4][8][2] = {
|
||||
{
|
||||
{NULL, NULL,},
|
||||
@ -263,56 +141,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = {
|
||||
{NULL, NULL,},
|
||||
},
|
||||
{
|
||||
{Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,},
|
||||
{Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,},
|
||||
{Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,},
|
||||
{Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,},
|
||||
{Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,},
|
||||
{Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,},
|
||||
{Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,},
|
||||
{Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,},
|
||||
{Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,},
|
||||
{Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,},
|
||||
},
|
||||
{
|
||||
{Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,},
|
||||
{Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,},
|
||||
{Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,},
|
||||
{Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,},
|
||||
{Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,},
|
||||
{Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,},
|
||||
{Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,},
|
||||
{Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,},
|
||||
{Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,},
|
||||
{Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,},
|
||||
},
|
||||
{
|
||||
{Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,},
|
||||
{Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,},
|
||||
{Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,},
|
||||
{Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,},
|
||||
{Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,},
|
||||
{Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,},
|
||||
{Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,},
|
||||
{Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,},
|
||||
{Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,},
|
||||
{Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,},
|
||||
},
|
||||
};
|
||||
|
||||
static int tableReadPositionVertexSize[4][8][2] = {
|
||||
{
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
|
||||
},
|
||||
{
|
||||
{2, 3,},
|
||||
{2, 3,},
|
||||
{4, 6,},
|
||||
{4, 6,},
|
||||
{8, 12,},
|
||||
{2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,},
|
||||
},
|
||||
{
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
|
||||
},
|
||||
{
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
|
||||
},
|
||||
};
|
||||
|
||||
@ -322,10 +184,10 @@ void VertexLoader_Position::Init(void) {
|
||||
#if _M_SSE >= 0x301
|
||||
|
||||
if (cpu_info.bSSSE3) {
|
||||
tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3;
|
||||
tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3;
|
||||
tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3;
|
||||
tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3;
|
||||
tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3<u8, false>;
|
||||
tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3<u8, true>;
|
||||
tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3<u16, false>;
|
||||
tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3<u16, true>;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -28,8 +28,22 @@
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]);
|
||||
#define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]);
|
||||
template <int N>
|
||||
void LOG_TEX();
|
||||
|
||||
template <>
|
||||
__forceinline void LOG_TEX<1>()
|
||||
{
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
// PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline void LOG_TEX<2>()
|
||||
{
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
// PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
}
|
||||
|
||||
extern int tcIndex;
|
||||
extern float tcScale[8];
|
||||
@ -39,279 +53,54 @@ void LOADERDECL TexCoord_Read_Dummy()
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_UByte1()
|
||||
template <typename T>
|
||||
float TCScale(T val)
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_UByte2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
return val * tcScale[tcIndex];
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_Byte1()
|
||||
template <>
|
||||
float TCScale(float val)
|
||||
{ return val; }
|
||||
|
||||
template <typename T, int N>
|
||||
void LOADERDECL TexCoord_ReadDirect()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_Byte2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
for (int i = 0; i != N; ++i)
|
||||
DataWrite(TCScale(DataRead<T>()));
|
||||
|
||||
LOG_TEX<N>();
|
||||
|
||||
++tcIndex;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_UShort1()
|
||||
template <typename I, typename T, int N>
|
||||
void LOADERDECL TexCoord_ReadIndex()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_UShort2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_Short1()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_Short2()
|
||||
{
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadDirect_Float1()
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadDirect_Float2()
|
||||
{
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
// ==================================================================================
|
||||
void LOADERDECL TexCoord_ReadIndex8_UByte1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(*pData) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_UByte2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_Byte1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(*pData) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_Byte2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_UShort1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(*pData) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_UShort2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_Short1()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_Short2()
|
||||
{
|
||||
u8 Index = DataReadU8();
|
||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex8_Float1()
|
||||
{
|
||||
u16 Index = DataReadU8();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex8_Float2()
|
||||
{
|
||||
u16 Index = DataReadU8();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
// ==================================================================================
|
||||
void LOADERDECL TexCoord_ReadIndex16_UByte1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_UByte2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Byte1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_Byte2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_UShort1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
void LOADERDECL TexCoord_ReadIndex16_UShort2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(*pData) * tcScale[tcIndex];
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short2()
|
||||
{
|
||||
// Heavy in ZWW
|
||||
u16 Index = DataReadU16();
|
||||
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
|
||||
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex];
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||
|
||||
auto const index = DataRead<I>();
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
|
||||
+ (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex]));
|
||||
|
||||
for (int i = 0; i != N; ++i)
|
||||
DataWrite(TCScale(Common::FromBigEndian(data[i])));
|
||||
|
||||
LOG_TEX<N>();
|
||||
++tcIndex;
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L);
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4()
|
||||
template <typename I>
|
||||
void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
|
||||
{
|
||||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||
|
||||
// Heavy in ZWW
|
||||
u16 Index = DataReadU16();
|
||||
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
auto const index = DataRead<I>();
|
||||
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
const __m128i a = _mm_cvtsi32_si128(*pData);
|
||||
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
|
||||
const __m128i c = _mm_cvtepi16_epi32(b);
|
||||
@ -319,47 +108,27 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4()
|
||||
const __m128 e = _mm_load1_ps(&tcScale[tcIndex]);
|
||||
const __m128 f = _mm_mul_ps(d, e);
|
||||
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f);
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
VertexManager::s_pCurBufferPointer += sizeof(float) * 2;
|
||||
LOG_TEX<2>();
|
||||
tcIndex++;
|
||||
}
|
||||
#endif
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float1()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
LOG_TEX1();
|
||||
VertexManager::s_pCurBufferPointer += 4;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float2()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||
LOG_TEX2();
|
||||
VertexManager::s_pCurBufferPointer += 8;
|
||||
tcIndex++;
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
||||
|
||||
void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3()
|
||||
template <typename I>
|
||||
void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
|
||||
{
|
||||
u16 Index = DataReadU16();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||
|
||||
auto const index = DataRead<I>();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
|
||||
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
|
||||
u8* p = VertexManager::s_pCurBufferPointer;
|
||||
_mm_storel_epi64((__m128i*)p, b);
|
||||
LOG_TEX2();
|
||||
p += 8;
|
||||
VertexManager::s_pCurBufferPointer = p;
|
||||
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||
VertexManager::s_pCurBufferPointer += sizeof(float) * 2;
|
||||
LOG_TEX<2>();
|
||||
tcIndex++;
|
||||
}
|
||||
#endif
|
||||
@ -373,56 +142,40 @@ static TPipelineFunction tableReadTexCoord[4][8][2] = {
|
||||
{NULL, NULL,},
|
||||
},
|
||||
{
|
||||
{TexCoord_ReadDirect_UByte1, TexCoord_ReadDirect_UByte2,},
|
||||
{TexCoord_ReadDirect_Byte1, TexCoord_ReadDirect_Byte2,},
|
||||
{TexCoord_ReadDirect_UShort1, TexCoord_ReadDirect_UShort2,},
|
||||
{TexCoord_ReadDirect_Short1, TexCoord_ReadDirect_Short2,},
|
||||
{TexCoord_ReadDirect_Float1, TexCoord_ReadDirect_Float2,},
|
||||
{TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>,},
|
||||
{TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>,},
|
||||
{TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>,},
|
||||
{TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>,},
|
||||
{TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>,},
|
||||
},
|
||||
{
|
||||
{TexCoord_ReadIndex8_UByte1, TexCoord_ReadIndex8_UByte2,},
|
||||
{TexCoord_ReadIndex8_Byte1, TexCoord_ReadIndex8_Byte2,},
|
||||
{TexCoord_ReadIndex8_UShort1, TexCoord_ReadIndex8_UShort2,},
|
||||
{TexCoord_ReadIndex8_Short1, TexCoord_ReadIndex8_Short2,},
|
||||
{TexCoord_ReadIndex8_Float1, TexCoord_ReadIndex8_Float2,},
|
||||
{TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>,},
|
||||
{TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>,},
|
||||
{TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>,},
|
||||
{TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>,},
|
||||
{TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>,},
|
||||
},
|
||||
{
|
||||
{TexCoord_ReadIndex16_UByte1, TexCoord_ReadIndex16_UByte2,},
|
||||
{TexCoord_ReadIndex16_Byte1, TexCoord_ReadIndex16_Byte2,},
|
||||
{TexCoord_ReadIndex16_UShort1, TexCoord_ReadIndex16_UShort2,},
|
||||
{TexCoord_ReadIndex16_Short1, TexCoord_ReadIndex16_Short2,},
|
||||
{TexCoord_ReadIndex16_Float1, TexCoord_ReadIndex16_Float2,},
|
||||
{TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>,},
|
||||
{TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>,},
|
||||
{TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>,},
|
||||
{TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>,},
|
||||
{TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>,},
|
||||
},
|
||||
};
|
||||
|
||||
static int tableReadTexCoordVertexSize[4][8][2] = {
|
||||
{
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,},
|
||||
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
|
||||
},
|
||||
{
|
||||
{1, 2,},
|
||||
{1, 2,},
|
||||
{2, 4,},
|
||||
{2, 4,},
|
||||
{4, 8,},
|
||||
{1, 2,}, {1, 2,}, {2, 4,}, {2, 4,}, {4, 8,},
|
||||
},
|
||||
{
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,},
|
||||
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
|
||||
},
|
||||
{
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,},
|
||||
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
|
||||
},
|
||||
};
|
||||
|
||||
@ -430,16 +183,20 @@ void VertexLoader_TextCoord::Init(void) {
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
|
||||
if (cpu_info.bSSSE3) {
|
||||
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex16_Float2_SSSE3;
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
tableReadTexCoord[2][4][1] = TexCoord_ReadIndex_Float2_SSSE3<u8>;
|
||||
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex_Float2_SSSE3<u16>;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
if (cpu_info.bSSE4_1) {
|
||||
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex16_Short2_SSE4;
|
||||
if (cpu_info.bSSE4_1)
|
||||
{
|
||||
tableReadTexCoord[2][3][1] = TexCoord_ReadIndex_Short2_SSE4<u8>;
|
||||
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex_Short2_SSE4<u16>;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -12,174 +12,120 @@
|
||||
#include "BPStructs.h"
|
||||
|
||||
#include "VertexManagerBase.h"
|
||||
#include "MainBase.h"
|
||||
#include "VideoConfig.h"
|
||||
|
||||
VertexManager *g_vertex_manager;
|
||||
|
||||
u8 *VertexManager::s_pCurBufferPointer;
|
||||
u8 *VertexManager::s_pBaseBufferPointer;
|
||||
|
||||
u8 *VertexManager::LocalVBuffer;
|
||||
u16 *VertexManager::TIBuffer;
|
||||
u16 *VertexManager::LIBuffer;
|
||||
u16 *VertexManager::PIBuffer;
|
||||
|
||||
bool VertexManager::Flushed;
|
||||
u8 *VertexManager::s_pEndBufferPointer;
|
||||
|
||||
VertexManager::VertexManager()
|
||||
{
|
||||
Flushed = false;
|
||||
LocalVBuffer.resize(MAXVBUFFERSIZE);
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0];
|
||||
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
|
||||
|
||||
LocalVBuffer = new u8[MAXVBUFFERSIZE];
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer;
|
||||
TIBuffer.resize(MAXIBUFFERSIZE);
|
||||
LIBuffer.resize(MAXIBUFFERSIZE);
|
||||
PIBuffer.resize(MAXIBUFFERSIZE);
|
||||
|
||||
TIBuffer = new u16[MAXIBUFFERSIZE];
|
||||
LIBuffer = new u16[MAXIBUFFERSIZE];
|
||||
PIBuffer = new u16[MAXIBUFFERSIZE];
|
||||
|
||||
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
|
||||
}
|
||||
|
||||
void VertexManager::ResetBuffer()
|
||||
{
|
||||
s_pCurBufferPointer = LocalVBuffer;
|
||||
}
|
||||
|
||||
VertexManager::~VertexManager()
|
||||
{
|
||||
delete[] LocalVBuffer;
|
||||
|
||||
delete[] TIBuffer;
|
||||
delete[] LIBuffer;
|
||||
delete[] PIBuffer;
|
||||
|
||||
// TODO: necessary??
|
||||
ResetBuffer();
|
||||
}
|
||||
|
||||
void VertexManager::AddIndices(int primitive, int numVertices)
|
||||
{
|
||||
//switch (primitive)
|
||||
//{
|
||||
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break;
|
||||
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break;
|
||||
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break;
|
||||
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break;
|
||||
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break;
|
||||
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break;
|
||||
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break;
|
||||
//}
|
||||
VertexManager::~VertexManager()
|
||||
{}
|
||||
|
||||
static void (*const primitive_table[])(int) =
|
||||
void VertexManager::ResetBuffer()
|
||||
{
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer;
|
||||
IndexGenerator::Start(GetTriangleIndexBuffer(), GetLineIndexBuffer(), GetPointIndexBuffer());
|
||||
}
|
||||
|
||||
u32 VertexManager::GetRemainingSize()
|
||||
{
|
||||
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
|
||||
}
|
||||
|
||||
void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
|
||||
{
|
||||
u32 const needed_vertex_bytes = count * stride;
|
||||
|
||||
if (needed_vertex_bytes > GetRemainingSize() || count > GetRemainingIndices(primitive))
|
||||
{
|
||||
IndexGenerator::AddQuads,
|
||||
NULL,
|
||||
IndexGenerator::AddList,
|
||||
IndexGenerator::AddStrip,
|
||||
IndexGenerator::AddFan,
|
||||
IndexGenerator::AddLineList,
|
||||
IndexGenerator::AddLineStrip,
|
||||
IndexGenerator::AddPoints,
|
||||
};
|
||||
|
||||
primitive_table[primitive](numVertices);
|
||||
Flush();
|
||||
|
||||
if (needed_vertex_bytes > GetRemainingSize())
|
||||
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
|
||||
"Increase MAXVBUFFERSIZE or we need primitive breaking afterall.");
|
||||
if (count > GetRemainingIndices(primitive))
|
||||
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
|
||||
"Increase MAXIBUFFERSIZE or we need primitive breaking afterall.");
|
||||
}
|
||||
}
|
||||
|
||||
int VertexManager::GetRemainingSize()
|
||||
bool VertexManager::IsFlushed() const
|
||||
{
|
||||
return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer);
|
||||
return s_pBaseBufferPointer == s_pCurBufferPointer;
|
||||
}
|
||||
|
||||
int VertexManager::GetRemainingVertices(int primitive)
|
||||
u32 VertexManager::GetRemainingIndices(int primitive)
|
||||
{
|
||||
switch (primitive)
|
||||
{
|
||||
case GX_DRAW_QUADS:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 6 * 4;
|
||||
case GX_DRAW_TRIANGLES:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen());
|
||||
case GX_DRAW_TRIANGLE_STRIP:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
|
||||
case GX_DRAW_TRIANGLE_FAN:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3;
|
||||
break;
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
|
||||
|
||||
case GX_DRAW_LINES:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen());
|
||||
case GX_DRAW_LINE_STRIP:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2;
|
||||
break;
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2 + 1;
|
||||
|
||||
case GX_DRAW_POINTS:
|
||||
return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen());
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexManager::AddVertices(int primitive, int numVertices)
|
||||
void VertexManager::AddVertices(int primitive, u32 numVertices)
|
||||
{
|
||||
if (numVertices <= 0)
|
||||
return;
|
||||
|
||||
switch (primitive)
|
||||
{
|
||||
case GX_DRAW_QUADS:
|
||||
case GX_DRAW_TRIANGLES:
|
||||
case GX_DRAW_TRIANGLE_STRIP:
|
||||
case GX_DRAW_TRIANGLE_FAN:
|
||||
if (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen() < 3 * numVertices)
|
||||
Flush();
|
||||
break;
|
||||
|
||||
case GX_DRAW_LINES:
|
||||
case GX_DRAW_LINE_STRIP:
|
||||
if (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen() < 2 * numVertices)
|
||||
Flush();
|
||||
break;
|
||||
|
||||
case GX_DRAW_POINTS:
|
||||
if (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen() < numVertices)
|
||||
Flush();
|
||||
break;
|
||||
|
||||
default:
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
if (Flushed)
|
||||
{
|
||||
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
|
||||
Flushed = false;
|
||||
}
|
||||
|
||||
ADDSTAT(stats.thisFrame.numPrims, numVertices);
|
||||
INCSTAT(stats.thisFrame.numPrimitiveJoins);
|
||||
AddIndices(primitive, numVertices);
|
||||
|
||||
IndexGenerator::AddIndices(primitive, numVertices);
|
||||
}
|
||||
|
||||
void VertexManager::Flush()
|
||||
{
|
||||
if (g_vertex_manager->IsFlushed())
|
||||
return;
|
||||
|
||||
// loading a state will invalidate BP, so check for it
|
||||
g_video_backend->CheckInvalidState();
|
||||
|
||||
if (LocalVBuffer == s_pCurBufferPointer) return;
|
||||
if (Flushed) return;
|
||||
Flushed = true;
|
||||
VideoFifo_CheckEFBAccess();
|
||||
|
||||
g_vertex_manager->vFlush();
|
||||
|
||||
g_vertex_manager->ResetBuffer();
|
||||
}
|
||||
|
||||
// TODO: need to merge more stuff into VideoCommon to use this
|
||||
#if (0)
|
||||
void VertexManager::Flush()
|
||||
{
|
||||
if (LocalVBuffer == s_pCurBufferPointer || Flushed)
|
||||
return;
|
||||
|
||||
Flushed = true;
|
||||
|
||||
VideoFifo_CheckEFBAccess();
|
||||
|
||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
|
||||
xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op,
|
||||
@ -252,21 +198,23 @@ void VertexManager::Flush()
|
||||
|
||||
// finally bind
|
||||
if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components))
|
||||
goto shader_fail;
|
||||
return;
|
||||
if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
|
||||
goto shader_fail;
|
||||
return;
|
||||
|
||||
const int stride = g_nativeVertexFmt->GetVertexStride();
|
||||
//if (g_nativeVertexFmt)
|
||||
g_nativeVertexFmt->SetupVertexPointers();
|
||||
|
||||
g_renderer->ResumePixelPerf(false);
|
||||
g_vertex_manager->Draw(stride, false);
|
||||
g_renderer->PausePixelPerf(false);
|
||||
|
||||
// run through vertex groups again to set alpha
|
||||
if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
|
||||
{
|
||||
if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components))
|
||||
goto shader_fail;
|
||||
return;
|
||||
|
||||
g_vertex_manager->Draw(stride, true);
|
||||
}
|
||||
@ -280,10 +228,12 @@ void VertexManager::Flush()
|
||||
// save the shaders
|
||||
char strfile[255];
|
||||
sprintf(strfile, "%sps%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), g_ActiveConfig.iSaveTargetId);
|
||||
std::ofstream fps(strfile);
|
||||
std::ofstream fps;
|
||||
OpenFStream(fps, strfile, std::ios_base::out);
|
||||
fps << ps->strprog.c_str();
|
||||
sprintf(strfile, "%svs%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), g_ActiveConfig.iSaveTargetId);
|
||||
std::ofstream fvs(strfile);
|
||||
std::ofstream fvs;
|
||||
OpenFStream(fvs, strfile, std::ios_base::out);
|
||||
fvs << vs->strprog.c_str();
|
||||
}
|
||||
|
||||
@ -300,9 +250,6 @@ void VertexManager::Flush()
|
||||
}
|
||||
#endif
|
||||
++g_Config.iSaveTargetId;
|
||||
|
||||
shader_fail:
|
||||
ResetBuffer();
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -313,12 +260,16 @@ void VertexManager::DoState(PointerWrap& p)
|
||||
|
||||
void VertexManager::DoStateShared(PointerWrap& p)
|
||||
{
|
||||
p.DoPointer(s_pCurBufferPointer, LocalVBuffer);
|
||||
p.DoArray(LocalVBuffer, MAXVBUFFERSIZE);
|
||||
p.DoArray(TIBuffer, MAXIBUFFERSIZE);
|
||||
p.DoArray(LIBuffer, MAXIBUFFERSIZE);
|
||||
p.DoArray(PIBuffer, MAXIBUFFERSIZE);
|
||||
|
||||
if (p.GetMode() == PointerWrap::MODE_READ)
|
||||
Flushed = false;
|
||||
// It seems we half-assume to be flushed here
|
||||
// We update s_pCurBufferPointer yet don't worry about IndexGenerator's outdated pointers
|
||||
// and maybe other things are overlooked
|
||||
|
||||
p.Do(LocalVBuffer);
|
||||
p.Do(TIBuffer);
|
||||
p.Do(LIBuffer);
|
||||
p.Do(PIBuffer);
|
||||
|
||||
s_pBaseBufferPointer = &LocalVBuffer[0];
|
||||
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
|
||||
p.DoPointer(s_pCurBufferPointer, s_pBaseBufferPointer);
|
||||
}
|
||||
|
@ -2,72 +2,70 @@
|
||||
#ifndef _VERTEXMANAGERBASE_H
|
||||
#define _VERTEXMANAGERBASE_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
class NativeVertexFormat;
|
||||
class PointerWrap;
|
||||
|
||||
class VertexManager
|
||||
{
|
||||
private:
|
||||
// What are the actual values?
|
||||
static const u32 SMALLEST_POSSIBLE_VERTEX = 1;
|
||||
static const u32 LARGEST_POSSIBLE_VERTEX = 188;
|
||||
|
||||
static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;
|
||||
|
||||
public:
|
||||
|
||||
enum
|
||||
{
|
||||
// values from OGL backend
|
||||
//MAXVBUFFERSIZE = 0x1FFFF,
|
||||
//MAXIBUFFERSIZE = 0xFFFF,
|
||||
|
||||
// values from DX9 backend
|
||||
//MAXVBUFFERSIZE = 0x50000,
|
||||
//MAXIBUFFERSIZE = 0xFFFF,
|
||||
|
||||
// values from DX11 backend
|
||||
MAXVBUFFERSIZE = 0x50000,
|
||||
MAXIBUFFERSIZE = 0xFFFF,
|
||||
};
|
||||
static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX;
|
||||
|
||||
// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
|
||||
static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3;
|
||||
|
||||
VertexManager();
|
||||
virtual ~VertexManager(); // needs to be virtual for DX11's dtor
|
||||
// needs to be virtual for DX11's dtor
|
||||
virtual ~VertexManager();
|
||||
|
||||
static void AddVertices(int _primitive, int _numVertices);
|
||||
static void AddVertices(int _primitive, u32 _numVertices);
|
||||
|
||||
// TODO: protected?
|
||||
static u8 *s_pCurBufferPointer;
|
||||
static u8 *s_pBaseBufferPointer;
|
||||
static u8 *s_pEndBufferPointer;
|
||||
|
||||
static int GetRemainingSize();
|
||||
static int GetRemainingVertices(int primitive);
|
||||
static u32 GetRemainingSize();
|
||||
static void PrepareForAdditionalData(int primitive, u32 count, u32 stride);
|
||||
static u32 GetRemainingIndices(int primitive);
|
||||
|
||||
static void Flush();
|
||||
|
||||
virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0;
|
||||
|
||||
static u16* GetTriangleIndexBuffer() { return TIBuffer; }
|
||||
static u16* GetLineIndexBuffer() { return LIBuffer; }
|
||||
static u16* GetPointIndexBuffer() { return PIBuffer; }
|
||||
static u8* GetVertexBuffer() { return LocalVBuffer; }
|
||||
|
||||
static void DoState(PointerWrap& p);
|
||||
virtual void CreateDeviceObjects(){};
|
||||
virtual void DestroyDeviceObjects(){};
|
||||
|
||||
protected:
|
||||
// TODO: make private after Flush() is merged
|
||||
static void ResetBuffer();
|
||||
|
||||
static u8 *LocalVBuffer;
|
||||
static u16 *TIBuffer;
|
||||
static u16 *LIBuffer;
|
||||
static u16 *PIBuffer;
|
||||
|
||||
static bool Flushed;
|
||||
u16* GetTriangleIndexBuffer() { return &TIBuffer[0]; }
|
||||
u16* GetLineIndexBuffer() { return &LIBuffer[0]; }
|
||||
u16* GetPointIndexBuffer() { return &PIBuffer[0]; }
|
||||
u8* GetVertexBuffer() { return &s_pBaseBufferPointer[0]; }
|
||||
|
||||
virtual void vDoState(PointerWrap& p) { DoStateShared(p); }
|
||||
void DoStateShared(PointerWrap& p);
|
||||
|
||||
private:
|
||||
static void AddIndices(int primitive, int numVertices);
|
||||
bool IsFlushed() const;
|
||||
|
||||
void ResetBuffer();
|
||||
|
||||
//virtual void Draw(u32 stride, bool alphapass) = 0;
|
||||
// temp
|
||||
virtual void vFlush() = 0;
|
||||
|
||||
|
||||
std::vector<u8> LocalVBuffer;
|
||||
std::vector<u16> TIBuffer;
|
||||
std::vector<u16> LIBuffer;
|
||||
std::vector<u16> PIBuffer;
|
||||
};
|
||||
|
||||
extern VertexManager *g_vertex_manager;
|
||||
|
@ -114,7 +114,8 @@ void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std
|
||||
static int num_failures = 0;
|
||||
char szTemp[MAX_PATH];
|
||||
sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
|
||||
std::ofstream file(szTemp);
|
||||
std::ofstream file;
|
||||
OpenFStream(file, szTemp, std::ios_base::out);
|
||||
file << msg;
|
||||
file << "\n\nOld shader code:\n" << old_code;
|
||||
file << "\n\nNew shader code:\n" << new_code;
|
||||
@ -515,7 +516,6 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
|
||||
|
||||
//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
|
||||
//if not early z culling will improve speed
|
||||
// TODO: Can probably be dropped?
|
||||
if (is_d3d)
|
||||
{
|
||||
WRITE(p, "o.pos.z = " I_DEPTHPARAMS".x * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y;\n");
|
||||
|
@ -90,8 +90,8 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
|
||||
#define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
|
||||
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]);
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
|
||||
#define LOG_VTX()
|
||||
|
||||
|
@ -42,6 +42,7 @@ VideoConfig::VideoConfig()
|
||||
// disable all features by default
|
||||
backend_info.APIType = API_NONE;
|
||||
backend_info.bUseRGBATextures = false;
|
||||
backend_info.bUseMinimalMipCount = false;
|
||||
backend_info.bSupports3DVision = false;
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@ struct VideoConfig
|
||||
int iAnaglyphStereoSeparation;
|
||||
int iAnaglyphFocalAngle;
|
||||
bool b3DVision;
|
||||
|
||||
|
||||
// Hacks
|
||||
bool bEFBAccessEnable;
|
||||
bool bDlistCachingEnable;
|
||||
@ -158,6 +158,7 @@ struct VideoConfig
|
||||
std::vector<std::string> PPShaders; // post-processing shaders
|
||||
|
||||
bool bUseRGBATextures; // used for D3D11 in TextureCache
|
||||
bool bUseMinimalMipCount;
|
||||
bool bSupports3DVision;
|
||||
bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL
|
||||
bool bSupportsFormatReinterpretation;
|
||||
|
@ -35,7 +35,7 @@
|
||||
#include "VertexLoaderManager.h"
|
||||
#include "VertexManagerBase.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
#include "x64ABI.h"
|
||||
|
||||
#include "DLCache.h"
|
||||
#include "VideoConfig.h"
|
||||
@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
|
||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||
numVertices);
|
||||
u8* EndAddress = VertexManager::s_pCurBufferPointer;
|
||||
u32 Vdatasize = (u32)(EndAddress - StartAddress);
|
||||
u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress);
|
||||
if (Vdatasize > 0)
|
||||
{
|
||||
// Compile
|
@ -1119,20 +1119,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
_mm_storeu_si128( (__m128i*)( dst+(y + iy+1) * width + x + 4 ), o4 );
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation:
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0; x < width; x += 8)
|
||||
for (int iy = 0; iy < 8; iy++, src += 4)
|
||||
for (int ix = 0; ix < 4; ix++)
|
||||
{
|
||||
int val = src[ix];
|
||||
u8 i1 = Convert4To8(val >> 4);
|
||||
u8 i2 = Convert4To8(val & 0xF);
|
||||
memset(dst+(y + iy) * width + x + ix * 2 , i1,4);
|
||||
memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case GX_TF_I8: // speed critical
|
||||
@ -1248,26 +1234,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0; x < width; x += 8)
|
||||
for (int iy = 0; iy < 4; ++iy, src += 8)
|
||||
{
|
||||
u32 * newdst = dst + (y + iy)*width+x;
|
||||
const u8 * newsrc = src;
|
||||
u8 srcval;
|
||||
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case GX_TF_C8:
|
||||
@ -1380,20 +1346,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
_mm_storeu_si128( (__m128i*)(dst + (y + iy) * width + x), r1 );
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation:
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0; x < width; x += 4)
|
||||
for (int iy = 0; iy < 4; iy++, src += 8)
|
||||
{
|
||||
u32 *ptr = dst + (y + iy) * width + x;
|
||||
u16 *s = (u16 *)src;
|
||||
ptr[0] = decodeIA8Swapped(s[0]);
|
||||
ptr[1] = decodeIA8Swapped(s[1]);
|
||||
ptr[2] = decodeIA8Swapped(s[2]);
|
||||
ptr[3] = decodeIA8Swapped(s[3]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case GX_TF_C14X2:
|
||||
@ -1493,18 +1445,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
__m128i *ptr = (__m128i *)(dst + (y + iy) * width + x);
|
||||
_mm_storeu_si128(ptr, abgr888x4);
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation.
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0; x < width; x += 4)
|
||||
for (int iy = 0; iy < 4; iy++, src += 8)
|
||||
{
|
||||
u32 *ptr = dst + (y + iy) * width + x;
|
||||
u16 *s = (u16 *)src;
|
||||
for(int j = 0; j < 4; j++)
|
||||
*ptr++ = decode565RGBA(Common::swap16(*s++));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case GX_TF_RGB5A3:
|
||||
@ -1718,13 +1658,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
}
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation:
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0; x < width; x += 4)
|
||||
for (int iy = 0; iy < 4; iy++, src += 8)
|
||||
decodebytesRGB5A3rgba(dst+(y+iy)*width+x, (u16*)src);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case GX_TF_RGBA8: // speed critical
|
||||
@ -1860,16 +1793,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
_mm_storeu_si128(dst128, rgba11);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation.
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0; x < width; x += 4)
|
||||
{
|
||||
for (int iy = 0; iy < 4; iy++)
|
||||
decodebytesARGB8_4ToRgba(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16);
|
||||
src += 64;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case GX_TF_CMPR: // speed critical
|
||||
@ -2104,22 +2027,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
||||
}
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
for (int y = 0; y < height; y += 8)
|
||||
{
|
||||
for (int x = 0; x < width; x += 8)
|
||||
{
|
||||
decodeDXTBlockRGBA((u32*)dst + y * width + x, (DXTBlock*)src, width);
|
||||
src += sizeof(DXTBlock);
|
||||
decodeDXTBlockRGBA((u32*)dst + y * width + x + 4, (DXTBlock*)src, width);
|
||||
src += sizeof(DXTBlock);
|
||||
decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width);
|
||||
src += sizeof(DXTBlock);
|
||||
decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width);
|
||||
src += sizeof(DXTBlock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
@ -111,8 +111,8 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>..\Common\Src;..\Core\Src;..\..\..\Externals\SOIL;..\..\..\Externals\CLRun\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
@ -143,7 +143,7 @@
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>..\Common\Src;..\Core\Src;..\..\..\Externals\SOIL;..\..\..\Externals\CLRun\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
</ClCompile>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
@ -182,7 +182,6 @@
|
||||
<ClCompile Include="Src\CommandProcessor.cpp" />
|
||||
<ClCompile Include="Src\CPMemory.cpp" />
|
||||
<ClCompile Include="Src\Debugger.cpp" />
|
||||
<ClCompile Include="Src\DLCache.cpp" />
|
||||
<ClCompile Include="Src\EmuWindow.cpp" />
|
||||
<ClCompile Include="Src\Fifo.cpp" />
|
||||
<ClCompile Include="Src\FPSCounter.cpp" />
|
||||
@ -197,6 +196,7 @@
|
||||
<ClCompile Include="Src\OpcodeDecoding.cpp" />
|
||||
<ClCompile Include="Src\OpenCL.cpp" />
|
||||
<ClCompile Include="Src\OpenCL\OCLTextureDecoder.cpp" />
|
||||
<ClCompile Include="Src\PerfQueryBase.cpp" />
|
||||
<ClCompile Include="Src\PixelEngine.cpp" />
|
||||
<ClCompile Include="Src\PixelShaderGen.cpp" />
|
||||
<ClCompile Include="Src\PixelShaderManager.cpp" />
|
||||
@ -204,7 +204,6 @@
|
||||
<ClCompile Include="Src\Statistics.cpp" />
|
||||
<ClCompile Include="Src\TextureCacheBase.cpp" />
|
||||
<ClCompile Include="Src\TextureConversionShader.cpp" />
|
||||
<ClCompile Include="Src\TextureDecoder.cpp" />
|
||||
<ClCompile Include="Src\VertexLoader.cpp" />
|
||||
<ClCompile Include="Src\VertexLoaderManager.cpp" />
|
||||
<ClCompile Include="Src\VertexLoader_Color.cpp" />
|
||||
@ -216,6 +215,8 @@
|
||||
<ClCompile Include="Src\VertexShaderManager.cpp" />
|
||||
<ClCompile Include="Src\VideoConfig.cpp" />
|
||||
<ClCompile Include="Src\VideoState.cpp" />
|
||||
<ClCompile Include="Src\x64DLCache.cpp" />
|
||||
<ClCompile Include="Src\x64TextureDecoder.cpp" />
|
||||
<ClCompile Include="Src\XFMemory.cpp" />
|
||||
<ClCompile Include="Src\XFStructs.cpp" />
|
||||
</ItemGroup>
|
||||
@ -244,6 +245,7 @@
|
||||
<ClInclude Include="Src\OpcodeDecoding.h" />
|
||||
<ClInclude Include="Src\OpenCL.h" />
|
||||
<ClInclude Include="Src\OpenCL\OCLTextureDecoder.h" />
|
||||
<ClInclude Include="Src\PerfQueryBase.h" />
|
||||
<ClInclude Include="Src\PixelEngine.h" />
|
||||
<ClInclude Include="Src\PixelShaderGen.h" />
|
||||
<ClInclude Include="Src\PixelShaderManager.h" />
|
||||
|
@ -5,9 +5,6 @@
|
||||
<ClCompile Include="Src\memcpy_amd.cpp" />
|
||||
<ClCompile Include="Src\PixelEngine.cpp" />
|
||||
<ClCompile Include="Src\VideoConfig.cpp" />
|
||||
<ClCompile Include="Src\DLCache.cpp">
|
||||
<Filter>Vertex Loading</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\VertexLoader.cpp">
|
||||
<Filter>Vertex Loading</Filter>
|
||||
</ClCompile>
|
||||
@ -92,9 +89,6 @@
|
||||
<ClCompile Include="Src\OpcodeDecoding.cpp">
|
||||
<Filter>Decoding</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\TextureDecoder.cpp">
|
||||
<Filter>Decoding</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\Debugger.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
@ -107,6 +101,9 @@
|
||||
<ClCompile Include="Src\MainBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\PerfQueryBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\RenderBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
@ -122,6 +119,12 @@
|
||||
<ClCompile Include="Src\FPSCounter.cpp">
|
||||
<Filter>Util</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\x64TextureDecoder.cpp">
|
||||
<Filter>Decoding</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\x64DLCache.cpp">
|
||||
<Filter>Vertex Loading</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Src\CommandProcessor.h" />
|
||||
@ -237,6 +240,9 @@
|
||||
<ClInclude Include="Src\MainBase.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Src\PerfQueryBase.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Src\RenderBase.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
@ -285,4 +291,4 @@
|
||||
<UniqueIdentifier>{e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
Reference in New Issue
Block a user