Merge branch 'arm-noglsl'

This commit is contained in:
Ryan Houdek
2013-02-27 02:22:08 +00:00
135 changed files with 9278 additions and 948 deletions

View File

@ -3,7 +3,6 @@ set(SRCS Src/BPFunctions.cpp
Src/BPStructs.cpp
Src/CPMemory.cpp
Src/CommandProcessor.cpp
Src/DLCache.cpp
Src/Debugger.cpp
Src/Fifo.cpp
Src/FPSCounter.cpp
@ -24,7 +23,6 @@ set(SRCS Src/BPFunctions.cpp
Src/Statistics.cpp
Src/TextureCacheBase.cpp
Src/TextureConversionShader.cpp
Src/TextureDecoder.cpp
Src/VertexLoader.cpp
Src/VertexLoaderManager.cpp
Src/VertexLoader_Color.cpp
@ -41,6 +39,14 @@ set(SRCS Src/BPFunctions.cpp
Src/memcpy_amd.cpp)
set(LIBS core)
if(NOT _M_GENERIC)
set(SRCS ${SRCS} Src/x64TextureDecoder.cpp
Src/x64DLCache.cpp)
else()
set(SRCS ${SRCS} Src/GenericTextureDecoder.cpp
Src/GenericDLCache.cpp)
endif()
if(NOT ${CL} STREQUAL CL-NOTFOUND)
list(APPEND LIBS ${CL})
endif()

View File

@ -222,11 +222,11 @@ void RunGpu()
{
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
SaveSSEState();
LoadDefaultSSEState();
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(uData, 32);
OpcodeDecoder_Run(g_bSkipCurrentFrame);
LoadSSEState();
FPURoundMode::LoadSIMDState();
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");

View File

@ -0,0 +1,52 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// TODO: Handle cache-is-full condition :p
#include "Common.h"
#include "DLCache.h"
namespace DLCache
{
void Init()
{
}
void Shutdown()
{
}
void Clear()
{
}
void ProgressiveCleanup()
{
}
} // namespace
// NOTE - outside the namespace on purpose.
bool HandleDisplayList(u32 address, u32 size)
{
return false;
}
void IncrementCheckContextId()
{
}

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,7 @@
#include "MemoryUtil.h"
#include "StringUtil.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "x64ABI.h"
#include "PixelEngine.h"
#include "Host.h"
@ -43,8 +43,9 @@
//BBox
#include "XFMemory.h"
extern float GC_ALIGNED16(g_fProjectionMatrix[16]);
#ifndef _M_GENERIC
#define USE_JIT
#endif
#define COMPILED_CODE_SIZE 4096
@ -82,8 +83,9 @@ static const float fractionTable[32] = {
1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27),
1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31),
};
#ifdef USE_JIT
using namespace Gen;
#endif
void LOADERDECL PosMtx_ReadDirect_UByte()
{
@ -182,14 +184,19 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
m_VtxDesc = vtx_desc;
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
#ifdef USE_JIT
AllocCodeSpace(COMPILED_CODE_SIZE);
CompileVertexTranslator();
WriteProtect();
#endif
}
VertexLoader::~VertexLoader()
{
#ifdef USE_JIT
FreeCodeSpace();
#endif
delete m_NativeFmt;
}
@ -474,7 +481,8 @@ void VertexLoader::WriteCall(TPipelineFunction func)
m_PipelineStages[m_numPipelineStages++] = func;
#endif
}
// ARMTODO: This should be done in a better way
#ifndef _M_GENERIC
void VertexLoader::WriteGetVariable(int bits, OpArg dest, void *address)
{
#ifdef USE_JIT
@ -498,7 +506,7 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
#endif
#endif
}
#endif
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{
m_numLoadedVertices += count;

View File

@ -76,7 +76,12 @@ private:
}
};
// ARMTODO: This should be done in a better way
#ifndef _M_GENERIC
class VertexLoader : public Gen::XCodeBlock, NonCopyable
#else
class VertexLoader
#endif
{
public:
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
@ -122,8 +127,10 @@ private:
void WriteCall(TPipelineFunction);
#ifndef _M_GENERIC
void WriteGetVariable(int bits, Gen::OpArg dest, void *address);
void WriteSetVariable(int bits, void *address, Gen::OpArg dest);
#endif
};
#endif

View File

@ -35,7 +35,7 @@
#include "VertexLoaderManager.h"
#include "VertexManagerBase.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "x64ABI.h"
#include "DLCache.h"
#include "VideoConfig.h"

View File

@ -1119,20 +1119,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
_mm_storeu_si128( (__m128i*)( dst+(y + iy+1) * width + x + 4 ), o4 );
}
}
#if 0
// Reference C implementation:
for (int y = 0; y < height; y += 8)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 8; iy++, src += 4)
for (int ix = 0; ix < 4; ix++)
{
int val = src[ix];
u8 i1 = Convert4To8(val >> 4);
u8 i2 = Convert4To8(val & 0xF);
memset(dst+(y + iy) * width + x + ix * 2 , i1,4);
memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4);
}
#endif
}
break;
case GX_TF_I8: // speed critical
@ -1248,26 +1234,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
}
}
#if 0
// Reference C implementation
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 4; ++iy, src += 8)
{
u32 * newdst = dst + (y + iy)*width+x;
const u8 * newsrc = src;
u8 srcval;
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
}
#endif
}
break;
case GX_TF_C8:
@ -1380,20 +1346,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
_mm_storeu_si128( (__m128i*)(dst + (y + iy) * width + x), r1 );
}
}
#if 0
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32 *ptr = dst + (y + iy) * width + x;
u16 *s = (u16 *)src;
ptr[0] = decodeIA8Swapped(s[0]);
ptr[1] = decodeIA8Swapped(s[1]);
ptr[2] = decodeIA8Swapped(s[2]);
ptr[3] = decodeIA8Swapped(s[3]);
}
#endif
}
break;
case GX_TF_C14X2:
@ -1493,18 +1445,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
__m128i *ptr = (__m128i *)(dst + (y + iy) * width + x);
_mm_storeu_si128(ptr, abgr888x4);
}
#if 0
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32 *ptr = dst + (y + iy) * width + x;
u16 *s = (u16 *)src;
for(int j = 0; j < 4; j++)
*ptr++ = decode565RGBA(Common::swap16(*s++));
}
#endif
}
break;
case GX_TF_RGB5A3:
@ -1718,13 +1658,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
}
}
}
#if 0
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
decodebytesRGB5A3rgba(dst+(y+iy)*width+x, (u16*)src);
#endif
}
break;
case GX_TF_RGBA8: // speed critical
@ -1860,16 +1793,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
_mm_storeu_si128(dst128, rgba11);
}
}
#if 0
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
{
for (int iy = 0; iy < 4; iy++)
decodebytesARGB8_4ToRgba(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16);
src += 64;
}
#endif
}
break;
case GX_TF_CMPR: // speed critical
@ -2104,22 +2027,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
}
}
}
#if 0
for (int y = 0; y < height; y += 8)
{
for (int x = 0; x < width; x += 8)
{
decodeDXTBlockRGBA((u32*)dst + y * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
decodeDXTBlockRGBA((u32*)dst + y * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
}
}
#endif
break;
}
}

View File

@ -111,8 +111,8 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<AdditionalIncludeDirectories>..\Common\Src;..\Core\Src;..\..\..\Externals\SOIL;..\..\..\Externals\CLRun\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<OpenMPSupport>false</OpenMPSupport>
</ClCompile>
<OpenMPSupport>false</OpenMPSupport>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
@ -143,7 +143,7 @@
<ClCompile>
<AdditionalIncludeDirectories>..\Common\Src;..\Core\Src;..\..\..\Externals\SOIL;..\..\..\Externals\CLRun\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<OpenMPSupport>false</OpenMPSupport>
</ClCompile>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -182,7 +182,6 @@
<ClCompile Include="Src\CommandProcessor.cpp" />
<ClCompile Include="Src\CPMemory.cpp" />
<ClCompile Include="Src\Debugger.cpp" />
<ClCompile Include="Src\DLCache.cpp" />
<ClCompile Include="Src\EmuWindow.cpp" />
<ClCompile Include="Src\Fifo.cpp" />
<ClCompile Include="Src\FPSCounter.cpp" />
@ -204,7 +203,6 @@
<ClCompile Include="Src\Statistics.cpp" />
<ClCompile Include="Src\TextureCacheBase.cpp" />
<ClCompile Include="Src\TextureConversionShader.cpp" />
<ClCompile Include="Src\TextureDecoder.cpp" />
<ClCompile Include="Src\VertexLoader.cpp" />
<ClCompile Include="Src\VertexLoaderManager.cpp" />
<ClCompile Include="Src\VertexLoader_Color.cpp" />
@ -216,6 +214,8 @@
<ClCompile Include="Src\VertexShaderManager.cpp" />
<ClCompile Include="Src\VideoConfig.cpp" />
<ClCompile Include="Src\VideoState.cpp" />
<ClCompile Include="Src\x64DLCache.cpp" />
<ClCompile Include="Src\x64TextureDecoder.cpp" />
<ClCompile Include="Src\XFMemory.cpp" />
<ClCompile Include="Src\XFStructs.cpp" />
</ItemGroup>

View File

@ -5,9 +5,6 @@
<ClCompile Include="Src\memcpy_amd.cpp" />
<ClCompile Include="Src\PixelEngine.cpp" />
<ClCompile Include="Src\VideoConfig.cpp" />
<ClCompile Include="Src\DLCache.cpp">
<Filter>Vertex Loading</Filter>
</ClCompile>
<ClCompile Include="Src\VertexLoader.cpp">
<Filter>Vertex Loading</Filter>
</ClCompile>
@ -92,9 +89,6 @@
<ClCompile Include="Src\OpcodeDecoding.cpp">
<Filter>Decoding</Filter>
</ClCompile>
<ClCompile Include="Src\TextureDecoder.cpp">
<Filter>Decoding</Filter>
</ClCompile>
<ClCompile Include="Src\Debugger.cpp">
<Filter>Base</Filter>
</ClCompile>
@ -122,6 +116,12 @@
<ClCompile Include="Src\FPSCounter.cpp">
<Filter>Util</Filter>
</ClCompile>
<ClCompile Include="Src\x64TextureDecoder.cpp">
<Filter>Decoding</Filter>
</ClCompile>
<ClCompile Include="Src\x64DLCache.cpp">
<Filter>Vertex Loading</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\CommandProcessor.h" />