ARM Support without GLSL

This commit is contained in:
Ryan Houdek
2013-02-26 13:49:00 -06:00
parent 46adbfa9ed
commit 717b976875
133 changed files with 9048 additions and 948 deletions

View File

@ -222,11 +222,11 @@ void RunGpu()
{
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
SaveSSEState();
LoadDefaultSSEState();
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(uData, 32);
OpcodeDecoder_Run(g_bSkipCurrentFrame);
LoadSSEState();
FPURoundMode::LoadSIMDState();
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");

View File

@ -0,0 +1,52 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// TODO: Handle cache-is-full condition :p
#include "Common.h"
#include "DLCache.h"
namespace DLCache
{
void Init()
{
}
void Shutdown()
{
}
void Clear()
{
}
void ProgressiveCleanup()
{
}
} // namespace
// NOTE - outside the namespace on purpose.
bool HandleDisplayList(u32 address, u32 size)
{
return false;
}
void IncrementCheckContextId()
{
}

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,7 @@
#include "MemoryUtil.h"
#include "StringUtil.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "x64ABI.h"
#include "PixelEngine.h"
#include "Host.h"
@ -43,8 +43,9 @@
//BBox
#include "XFMemory.h"
extern float GC_ALIGNED16(g_fProjectionMatrix[16]);
#ifndef _M_GENERIC
#define USE_JIT
#endif
#define COMPILED_CODE_SIZE 4096
@ -82,8 +83,9 @@ static const float fractionTable[32] = {
1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27),
1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31),
};
#ifdef USE_JIT
using namespace Gen;
#endif
void LOADERDECL PosMtx_ReadDirect_UByte()
{
@ -182,14 +184,19 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
m_VtxDesc = vtx_desc;
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
#ifdef USE_JIT
AllocCodeSpace(COMPILED_CODE_SIZE);
CompileVertexTranslator();
WriteProtect();
#endif
}
VertexLoader::~VertexLoader()
{
#ifdef USE_JIT
FreeCodeSpace();
#endif
delete m_NativeFmt;
}
@ -474,7 +481,8 @@ void VertexLoader::WriteCall(TPipelineFunction func)
m_PipelineStages[m_numPipelineStages++] = func;
#endif
}
// ARMTODO: This should be done in a better way
#ifndef _M_GENERIC
void VertexLoader::WriteGetVariable(int bits, OpArg dest, void *address)
{
#ifdef USE_JIT
@ -498,7 +506,7 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
#endif
#endif
}
#endif
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{
m_numLoadedVertices += count;

View File

@ -76,7 +76,12 @@ private:
}
};
// ARMTODO: This should be done in a better way
#ifndef _M_GENERIC
class VertexLoader : public Gen::XCodeBlock, NonCopyable
#else
class VertexLoader
#endif
{
public:
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
@ -122,8 +127,10 @@ private:
void WriteCall(TPipelineFunction);
#ifndef _M_GENERIC
void WriteGetVariable(int bits, Gen::OpArg dest, void *address);
void WriteSetVariable(int bits, void *address, Gen::OpArg dest);
#endif
};
#endif

View File

@ -35,7 +35,7 @@
#include "VertexLoaderManager.h"
#include "VertexManagerBase.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "x64ABI.h"
#include "DLCache.h"
#include "VideoConfig.h"

View File

@ -1119,20 +1119,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
_mm_storeu_si128( (__m128i*)( dst+(y + iy+1) * width + x + 4 ), o4 );
}
}
#if 0
// Reference C implementation:
for (int y = 0; y < height; y += 8)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 8; iy++, src += 4)
for (int ix = 0; ix < 4; ix++)
{
int val = src[ix];
u8 i1 = Convert4To8(val >> 4);
u8 i2 = Convert4To8(val & 0xF);
memset(dst+(y + iy) * width + x + ix * 2 , i1,4);
memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4);
}
#endif
}
break;
case GX_TF_I8: // speed critical
@ -1248,26 +1234,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
}
}
#if 0
// Reference C implementation
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 8)
for (int iy = 0; iy < 4; ++iy, src += 8)
{
u32 * newdst = dst + (y + iy)*width+x;
const u8 * newsrc = src;
u8 srcval;
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
}
#endif
}
break;
case GX_TF_C8:
@ -1380,20 +1346,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
_mm_storeu_si128( (__m128i*)(dst + (y + iy) * width + x), r1 );
}
}
#if 0
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32 *ptr = dst + (y + iy) * width + x;
u16 *s = (u16 *)src;
ptr[0] = decodeIA8Swapped(s[0]);
ptr[1] = decodeIA8Swapped(s[1]);
ptr[2] = decodeIA8Swapped(s[2]);
ptr[3] = decodeIA8Swapped(s[3]);
}
#endif
}
break;
case GX_TF_C14X2:
@ -1493,18 +1445,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
__m128i *ptr = (__m128i *)(dst + (y + iy) * width + x);
_mm_storeu_si128(ptr, abgr888x4);
}
#if 0
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
{
u32 *ptr = dst + (y + iy) * width + x;
u16 *s = (u16 *)src;
for(int j = 0; j < 4; j++)
*ptr++ = decode565RGBA(Common::swap16(*s++));
}
#endif
}
break;
case GX_TF_RGB5A3:
@ -1718,13 +1658,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
}
}
}
#if 0
// Reference C implementation:
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
for (int iy = 0; iy < 4; iy++, src += 8)
decodebytesRGB5A3rgba(dst+(y+iy)*width+x, (u16*)src);
#endif
}
break;
case GX_TF_RGBA8: // speed critical
@ -1860,16 +1793,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
_mm_storeu_si128(dst128, rgba11);
}
}
#if 0
// Reference C implementation.
for (int y = 0; y < height; y += 4)
for (int x = 0; x < width; x += 4)
{
for (int iy = 0; iy < 4; iy++)
decodebytesARGB8_4ToRgba(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16);
src += 64;
}
#endif
}
break;
case GX_TF_CMPR: // speed critical
@ -2104,22 +2027,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
}
}
}
#if 0
for (int y = 0; y < height; y += 8)
{
for (int x = 0; x < width; x += 8)
{
decodeDXTBlockRGBA((u32*)dst + y * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
decodeDXTBlockRGBA((u32*)dst + y * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width);
src += sizeof(DXTBlock);
}
}
#endif
break;
}
}