ARM Support without GLSL

2025-07-24 14:49:42 -06:00 · 2013-02-26 13:49:00 -06:00
parent 46adbfa9ed
commit 717b976875
133 changed files with 9048 additions and 948 deletions
--- a/Source/Core/VideoCommon/Src/Fifo.cpp
+++ b/Source/Core/VideoCommon/Src/Fifo.cpp
@ -222,11 +222,11 @@ void RunGpu()
 		{
 			u8 *uData = Memory::GetPointer(fifo.CPReadPointer);			
 			
-			SaveSSEState();
-			LoadDefaultSSEState();
+			FPURoundMode::SaveSIMDState();
+			FPURoundMode::LoadDefaultSIMDState();
 			ReadDataFromFifo(uData, 32);				
 			OpcodeDecoder_Run(g_bSkipCurrentFrame);	
-			LoadSSEState();
+			FPURoundMode::LoadSIMDState();

 			//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");

--- a/Source/Core/VideoCommon/Src/GenericDLCache.cpp
+++ b/Source/Core/VideoCommon/Src/GenericDLCache.cpp
@ -0,0 +1,52 @@
+// Copyright (C) 2003-2009 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+// TODO: Handle cache-is-full condition :p
+
+
+#include "Common.h"
+#include "DLCache.h"
+
+namespace DLCache
+{
+	
+void Init()
+{
+}
+
+void Shutdown()
+{
+}
+
+void Clear() 
+{	
+}
+
+void ProgressiveCleanup()
+{
+}
+}  // namespace
+
+// NOTE - outside the namespace on purpose.
+bool HandleDisplayList(u32 address, u32 size)
+{
+	return false;
+}
+
+void IncrementCheckContextId()
+{
+}
--- a/Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp
+++ b/Source/Core/VideoCommon/Src/GenericTextureDecoder.cpp
--- a/Source/Core/VideoCommon/Src/VertexLoader.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp
@ -23,7 +23,7 @@
 #include "MemoryUtil.h"
 #include "StringUtil.h"
 #include "x64Emitter.h"
-#include "ABI.h"
+#include "x64ABI.h"
 #include "PixelEngine.h"
 #include "Host.h"

@ -43,8 +43,9 @@
 //BBox
 #include "XFMemory.h"
 extern float GC_ALIGNED16(g_fProjectionMatrix[16]);
-
+#ifndef _M_GENERIC
 #define USE_JIT
+#endif

 #define COMPILED_CODE_SIZE 4096

@ -82,8 +83,9 @@ static const float fractionTable[32] = {
 	1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27),
 	1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31),
 };
-
+#ifdef USE_JIT
 using namespace Gen;
+#endif

 void LOADERDECL PosMtx_ReadDirect_UByte()
 {
@ -182,14 +184,19 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
 	m_VtxDesc = vtx_desc;
 	SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);

+	#ifdef USE_JIT
 	AllocCodeSpace(COMPILED_CODE_SIZE);
 	CompileVertexTranslator();
 	WriteProtect();
+	#endif
+
 }

 VertexLoader::~VertexLoader() 
 {
+	#ifdef USE_JIT
 	FreeCodeSpace();
+	#endif
 	delete m_NativeFmt;
 }

@ -474,7 +481,8 @@ void VertexLoader::WriteCall(TPipelineFunction func)
 	m_PipelineStages[m_numPipelineStages++] = func;
 #endif
 }
-
+// ARMTODO: This should be done in a better way
+#ifndef _M_GENERIC
 void VertexLoader::WriteGetVariable(int bits, OpArg dest, void *address)
 {
 #ifdef USE_JIT
@ -498,7 +506,7 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
 #endif
 #endif
 }
-
+#endif
 void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 {
 	m_numLoadedVertices += count;
--- a/Source/Core/VideoCommon/Src/VertexLoader.h
+++ b/Source/Core/VideoCommon/Src/VertexLoader.h
@ -76,7 +76,12 @@ private:
 	}
 };

+// ARMTODO: This should be done in a better way
+#ifndef _M_GENERIC
 class VertexLoader : public Gen::XCodeBlock, NonCopyable
+#else
+class VertexLoader
+#endif
 {
 public:
 	VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
@ -122,8 +127,10 @@ private:

 	void WriteCall(TPipelineFunction);

+#ifndef _M_GENERIC
 	void WriteGetVariable(int bits, Gen::OpArg dest, void *address);
 	void WriteSetVariable(int bits, void *address, Gen::OpArg dest);
+#endif
 };									  

 #endif
--- a/Source/Core/VideoCommon/Src/x64DLCache.cpp
+++ b/Source/Core/VideoCommon/Src/x64DLCache.cpp
@ -35,7 +35,7 @@
 #include "VertexLoaderManager.h"
 #include "VertexManagerBase.h"
 #include "x64Emitter.h"
-#include "ABI.h"
+#include "x64ABI.h"

 #include "DLCache.h"
 #include "VideoConfig.h"
--- a/Source/Core/VideoCommon/Src/x64TextureDecoder.cpp
+++ b/Source/Core/VideoCommon/Src/x64TextureDecoder.cpp
@ -1119,20 +1119,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 							_mm_storeu_si128( (__m128i*)( dst+(y + iy+1) * width + x + 4 ), o4 );
 						}
 			}
-#if 0
-			// Reference C implementation:
-			for (int y = 0; y < height; y += 8)
-				for (int x = 0; x < width; x += 8)
-					for (int iy = 0; iy < 8; iy++, src += 4)
-						for (int ix = 0; ix < 4; ix++)
-						{
-							int val = src[ix];
-							u8 i1 = Convert4To8(val >> 4);
-							u8 i2 = Convert4To8(val & 0xF);
-							memset(dst+(y + iy) * width + x + ix * 2 , i1,4);
-							memset(dst+(y + iy) * width + x + ix * 2 + 1 , i2,4);
-						}
-#endif
 		}
 	   break;
 	case GX_TF_I8:  // speed critical
@ -1248,26 +1234,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 						
 					}
 			}
-#if 0
-			// Reference C implementation
-			for (int y = 0; y < height; y += 4)
-				for (int x = 0; x < width; x += 8)
-					for (int iy = 0; iy < 4; ++iy, src += 8)
-					{
-						u32 *  newdst = dst + (y + iy)*width+x;
-						const u8 *  newsrc = src;
-						u8 srcval;
-
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = (newsrc++)[0]; (newdst++)[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-						srcval = newsrc[0]; newdst[0] = srcval | (srcval << 8) | (srcval << 16) | (srcval << 24);
-					}
-#endif
 		}
 		break;
 	case GX_TF_C8:
@ -1380,20 +1346,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 							_mm_storeu_si128( (__m128i*)(dst + (y + iy) * width + x), r1 );
 						}
 			}
-#if 0
-			// Reference C implementation:
-			for (int y = 0; y < height; y += 4)
-				for (int x = 0; x < width; x += 4)
-					for (int iy = 0; iy < 4; iy++, src += 8)
-					{
-						u32 *ptr = dst + (y + iy) * width + x;
-						u16 *s = (u16 *)src;
-						ptr[0] = decodeIA8Swapped(s[0]);
-						ptr[1] = decodeIA8Swapped(s[1]);
-						ptr[2] = decodeIA8Swapped(s[2]);
-						ptr[3] = decodeIA8Swapped(s[3]);
-					}
-#endif
 		}
 		break;
 	case GX_TF_C14X2:
@ -1493,18 +1445,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 						__m128i *ptr = (__m128i *)(dst + (y + iy) * width + x);
 						_mm_storeu_si128(ptr, abgr888x4);
 					}
-#if 0
-			// Reference C implementation.
-			for (int y = 0; y < height; y += 4)
-				for (int x = 0; x < width; x += 4)
-					for (int iy = 0; iy < 4; iy++, src += 8)
-					{
-						u32 *ptr = dst + (y + iy) * width + x;
-						u16 *s = (u16 *)src;
-						for(int j = 0; j < 4; j++)
-							*ptr++ = decode565RGBA(Common::swap16(*s++));
-					}
-#endif
 		}
 		break;
 	case GX_TF_RGB5A3:
@ -1718,13 +1658,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 							}
 						}
 				}
-#if 0
-			// Reference C implementation:
-			for (int y = 0; y < height; y += 4)
-				for (int x = 0; x < width; x += 4)
-					for (int iy = 0; iy < 4; iy++, src += 8)
-						decodebytesRGB5A3rgba(dst+(y+iy)*width+x, (u16*)src);
-#endif
 		}
 		break;
 	case GX_TF_RGBA8:  // speed critical
@ -1860,16 +1793,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 						_mm_storeu_si128(dst128, rgba11);
 					}				
 			}
-#if 0
-			// Reference C implementation.
-			for (int y = 0; y < height; y += 4)
-				for (int x = 0; x < width; x += 4)
-				{
-					for (int iy = 0; iy < 4; iy++)
-						decodebytesARGB8_4ToRgba(dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16);
-					src += 64;
-				}
-#endif
 		}
 		break;
 	case GX_TF_CMPR:  // speed critical
@ -2104,22 +2027,6 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
 					}					
 				}
 			}
-#if 0
-			for (int y = 0; y < height; y += 8)
-			{
-				for (int x = 0; x < width; x += 8)
-				{
-					decodeDXTBlockRGBA((u32*)dst + y * width + x, (DXTBlock*)src, width);
-										src += sizeof(DXTBlock);
-					decodeDXTBlockRGBA((u32*)dst + y * width + x + 4, (DXTBlock*)src, width);
-										src += sizeof(DXTBlock);
-					decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width);
-										src += sizeof(DXTBlock);
-					decodeDXTBlockRGBA((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width);
-										src += sizeof(DXTBlock);
-				}
-			}
-#endif
 			break;
 		}
 	}