ARM Support without GLSL

2025-07-24 14:49:42 -06:00 · 2013-02-26 13:49:00 -06:00
parent 46adbfa9ed
commit 717b976875
133 changed files with 9048 additions and 948 deletions
--- a/Source/Core/Common/Src/ArmCPUDetect.cpp
+++ b/Source/Core/Common/Src/ArmCPUDetect.cpp
@ -0,0 +1,160 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#include "Common.h"
+#include "CPUDetect.h"
+#include "StringUtil.h"
+
+const char procfile[] = "/proc/cpuinfo";
+
+char *GetCPUString()
+{
+	const char marker[] = "Hardware\t: ";
+	char *cpu_string = 0;
+	// Count the number of processor lines in /proc/cpuinfo
+	char buf[1024];
+	FILE *fp;
+
+	fp = fopen(procfile, "r");
+	if (!fp)
+		return 0;
+	
+	while (fgets(buf, sizeof(buf), fp))
+	{
+		if (strncmp(buf, marker, sizeof(marker) - 1))
+			continue;
+		cpu_string = buf + sizeof(marker) - 1;
+		cpu_string = strndup(cpu_string, strlen(cpu_string) - 1); // Strip the newline
+		break;
+	}
+	return cpu_string;
+}
+bool CheckCPUFeature(const char *feature)
+{
+	const char marker[] = "Features\t: ";
+	char buf[1024];
+	FILE *fp;
+
+	fp = fopen(procfile, "r");
+	if (!fp)
+		return 0;
+	
+	while (fgets(buf, sizeof(buf), fp))
+	{
+		if (strncmp(buf, marker, sizeof(marker) - 1))
+			continue;
+		char *featurestring = buf + sizeof(marker) - 1;
+		char *token = strtok(featurestring, " ");
+		while (token != NULL)
+		{
+			if (strstr(token, feature))
+				return true; 
+			token = strtok(NULL, " ");
+		}
+	}
+	return false;
+}
+int GetCoreCount()
+{
+	const char marker[] = "processor\t: ";
+	int cores = 0;
+	char buf[1024];
+	FILE *fp;
+
+	fp = fopen(procfile, "r");
+	if (!fp)
+		return 0;
+	
+	while (fgets(buf, sizeof(buf), fp))
+	{
+		if (strncmp(buf, marker, sizeof(marker) - 1))
+			continue;
+		++cores;
+	}
+	return cores;
+}
+
+CPUInfo cpu_info;
+
+CPUInfo::CPUInfo() {
+	Detect();
+}
+
+// Detects the various cpu features
+void CPUInfo::Detect()
+{
+	// Set some defaults here
+	// When ARMv8 cpus come out, these need to be updated.
+	HTT = false;
+	OS64bit = false;
+	CPU64bit = false;
+	Mode64bit = false;				 
+	vendor = VENDOR_ARM;
+	
+	// Get the information about the CPU 
+	strncpy(cpu_string, GetCPUString(), sizeof(cpu_string));
+	num_cores = GetCoreCount();	
+	bSwp = CheckCPUFeature("swp");
+	bHalf = CheckCPUFeature("half");
+	bThumb = CheckCPUFeature("thumb");
+	bFastMult = CheckCPUFeature("fastmult");
+	bVFP = CheckCPUFeature("vfp");
+	bEDSP = CheckCPUFeature("edsp");
+	bThumbEE = CheckCPUFeature("thumbee");
+	bNEON = CheckCPUFeature("neon");
+	bVFPv3 = CheckCPUFeature("vfpv3");
+	bTLS = CheckCPUFeature("tls");
+	bVFPv4 = CheckCPUFeature("vfpv4");
+	bIDIVa = CheckCPUFeature("idiva");
+	bIDIVt = CheckCPUFeature("idivt");
+	// These two are ARMv8 specific.
+	bFP = CheckCPUFeature("fp");
+	bASIMD = CheckCPUFeature("asimd");
+
+
+#if defined(__ARM_ARCH_7A__)
+	bArmV7 = true;
+#else
+	bArmV7 = false;
+#endif
+}
+
+// Turn the cpu info into a string we can show
+std::string CPUInfo::Summarize()
+{
+	std::string sum;
+	if (num_cores == 1)
+		sum = StringFromFormat("%s, %i core", cpu_string, num_cores);
+	else
+		sum = StringFromFormat("%s, %i cores", cpu_string, num_cores);
+
+	if (bSwp) sum += ", SWP";
+	if (bHalf) sum += ", Half";
+	if (bThumb) sum += ", Thumb";
+	if (bFastMult) sum += ", FastMult";
+	if (bVFP) sum += ", VFP";
+	if (bEDSP) sum += ", EDSP";
+	if (bThumbEE) sum += ", ThumbEE";
+	if (bNEON) sum += ", NEON";
+	if (bVFPv3) sum += ", VFPv3";
+	if (bTLS) sum += ", TLS";
+	if (bVFPv4) sum += ", VFPv4";
+	if (bIDIVa) sum += ", IDIVa";
+	if (bIDIVt) sum += ", IDIVt";
+
+	return sum;
+}
--- a/Source/Core/Common/Src/ArmEmitter.cpp
+++ b/Source/Core/Common/Src/ArmEmitter.cpp
@ -0,0 +1,967 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#include "Common.h"
+#include "ArmEmitter.h"
+#include "CPUDetect.h"
+
+#include <assert.h>
+#include <stdarg.h>
+
+// For cache flushing on Symbian/Blackberry
+#ifdef __SYMBIAN32__
+#include <e32std.h>
+#endif
+
+#ifdef BLACKBERRY
+#include <sys/mman.h>
+#endif
+
+namespace ArmGen
+{
+
+inline u32 RotR(u32 a, int amount) {
+	if (!amount) return a;
+	return (a >> amount) | (a << (32 - amount));
+}
+
+inline u32 RotL(u32 a, int amount) {
+	if (!amount) return a;
+	return (a << amount) | (a >> (32 - amount));
+}
+
+bool TryMakeOperand2(u32 imm, Operand2 &op2) {
+	// Just brute force it.
+	for (int i = 0; i < 16; i++) {
+		int mask = RotR(0xFF, i * 2);
+		if ((imm & mask) == imm) {
+			op2 = Operand2((u8)(RotL(imm, i * 2)), (u8)i);
+			return true;
+		}
+	}
+	return false;
+}
+
+bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse)
+{
+	if (!TryMakeOperand2(imm, op2)) {
+		*inverse = true;
+		return TryMakeOperand2(~imm, op2);
+	} else {
+		*inverse = false;
+		return true;
+	}
+}
+
+bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated)
+{
+	if (!TryMakeOperand2(imm, op2)) {
+		*negated = true;
+		return TryMakeOperand2(-imm, op2);
+	} else {
+		*negated = false;
+		return true;
+	}
+}
+
+void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)
+{
+	Operand2 op2;
+	bool inverse;
+	if (!optimize)
+	{
+		// Only used in backpatch atm
+		// Only support ARMv7 right now
+		if (cpu_info.bArmV7) {
+			MOVW(reg, val & 0xFFFF);
+			MOVT(reg, val, true);
+		}
+		else
+		{
+			// ARMv6 version won't use backpatch for now
+			// Run again with optimizations
+			MOVI2R(reg, val);
+		}
+	} else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {
+		if (!inverse)
+			MOV(reg, op2);
+		else
+			MVN(reg, op2);
+	} else {
+		if (cpu_info.bArmV7) {
+			// ARMv7 - can use MOVT/MOVW, best choice
+			MOVW(reg, val & 0xFFFF);
+			if(val & 0xFFFF0000)
+				MOVT(reg, val, true);
+		} else {
+			// ARMv6 - fallback sequence.
+			// TODO: Optimize further. Can for example choose negation etc.
+			// Literal pools is another way to do this but much more complicated
+			// so I can't really be bothered for an outdated CPU architecture like ARMv6.
+			bool first = true;
+			int shift = 16;
+			for (int i = 0; i < 4; i++) {
+				if (val & 0xFF) {
+					if (first) {
+						MOV(reg, Operand2((u8)val, (u8)(shift & 0xF)));
+						first = false;
+					} else {
+						ORR(reg, reg, Operand2((u8)val, (u8)(shift & 0xF)));
+					}
+				}
+				shift -= 4;
+				val >>= 8;
+			}
+		}
+	}
+}
+// Moves IMM to memory location
+void ARMXEmitter::ARMABI_MOVI2M(Operand2 op, Operand2 val)
+{
+	// This moves imm to a memory location
+	MOVW(R14, val); MOVT(R14, val, true);
+	MOVW(R12, op); MOVT(R12, op, true);
+	STR(R12, R14); // R10 is what we want to store
+}
+void ARMXEmitter::QuickCallFunction(ARMReg reg, void *func) {
+	MOVI2R(reg, (u32)(func));
+	BL(reg);
+}
+
+void ARMXEmitter::SetCodePtr(u8 *ptr)
+{
+	code = ptr;
+	startcode = code;
+}
+
+const u8 *ARMXEmitter::GetCodePtr() const
+{
+	return code;
+}
+
+u8 *ARMXEmitter::GetWritableCodePtr()
+{
+	return code;
+}
+
+void ARMXEmitter::ReserveCodeSpace(u32 bytes)
+{
+	for (u32 i = 0; i < bytes/4; i++)
+		Write32(0xE1200070); //bkpt 0
+}
+
+const u8 *ARMXEmitter::AlignCode16()
+{
+	ReserveCodeSpace((-(s32)code) & 15);
+	return code;
+}
+
+const u8 *ARMXEmitter::AlignCodePage()
+{
+	ReserveCodeSpace((-(s32)code) & 4095);
+	return code;
+}
+
+void ARMXEmitter::FlushIcache()
+{
+	FlushIcacheSection(lastCacheFlushEnd, code);
+	lastCacheFlushEnd = code;
+}
+
+void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end)
+{
+#ifdef __SYMBIAN32__
+	User::IMB_Range( start, end);
+#elif defined(BLACKBERRY)
+	msync(start, end - start, MS_SYNC | MS_INVALIDATE_ICACHE);
+#else
+#ifndef _WIN32
+#ifdef ANDROID
+	__builtin___clear_cache (start, end);
+#else
+	// If on Linux, we HAVE to clear from start addr or else everything gets /really/ unstable
+	__builtin___clear_cache (startcode, end); 
+#endif
+#endif
+#endif
+}
+
+void ARMXEmitter::SetCC(CCFlags cond)
+{
+	condition = cond << 28;
+}
+
+void ARMXEmitter::NOP(int count)
+{
+	for (int i = 0; i < count; i++) {
+		Write32(condition | 0x01A00000);
+	}
+}
+
+void ARMXEmitter::SETEND(bool BE)
+{
+	//SETEND is non-conditional
+	Write32( 0xF1010000 | (BE << 9));
+}
+void ARMXEmitter::BKPT(u16 arg)
+{
+	Write32(condition | 0x01200070 | (arg << 4 & 0x000FFF00) | (arg & 0x0000000F));
+}
+void ARMXEmitter::YIELD()
+{
+	Write32(condition | 0x0320F001);
+}
+
+FixupBranch ARMXEmitter::B()
+{
+	FixupBranch branch;
+	branch.type = 0; // Zero for B
+	branch.ptr = code;
+	branch.condition = condition;
+	//We'll write NOP here for now.
+	Write32(condition | 0x01A00000);
+	return branch;
+}
+FixupBranch ARMXEmitter::BL()
+{
+	FixupBranch branch;
+	branch.type = 1; // Zero for B
+	branch.ptr = code;
+	branch.condition = condition;
+	//We'll write NOP here for now.
+	Write32(condition | 0x01A00000);
+	return branch;
+}
+
+FixupBranch ARMXEmitter::B_CC(CCFlags Cond)
+{
+	FixupBranch branch;
+	branch.type = 0; // Zero for B
+	branch.ptr = code;
+	branch.condition = Cond << 28;
+	//We'll write NOP here for now.
+	Write32(condition | 0x01A00000);
+	return branch;
+}
+void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr)
+{
+	s32 distance = (s32)fnptr - (s32(code) + 8);
+        _assert_msg_(DYNA_REC, distance > -33554432
+                     && distance <=  33554432,
+                     "B_CC out of range (%p calls %p)", code, fnptr);
+
+	Write32((Cond << 28) | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));
+}
+FixupBranch ARMXEmitter::BL_CC(CCFlags Cond)
+{
+	FixupBranch branch;
+	branch.type = 1; // Zero for B
+	branch.ptr = code;
+	branch.condition = Cond << 28;
+	//We'll write NOP here for now.
+	Write32(condition | 0x01A00000);
+	return branch;
+}
+void ARMXEmitter::SetJumpTarget(FixupBranch const &branch)
+{
+	s32 distance =  (s32(code) - 8)  - (s32)branch.ptr;
+     _assert_msg_(DYNA_REC, distance > -33554432
+                     && distance <=  33554432,
+                     "SetJumpTarget out of range (%p calls %p)", code,
+					 branch.ptr);
+	if(branch.type == 0) // B
+		*(u32*)branch.ptr = (u32)(branch.condition | (10 << 24) | ((distance >> 2) &
+		0x00FFFFFF)); 
+	else // BL
+		*(u32*)branch.ptr =	(u32)(branch.condition | 0x0B000000 | ((distance >> 2)
+		& 0x00FFFFFF));
+}
+void ARMXEmitter::B (const void *fnptr)
+{
+	s32 distance = (s32)fnptr - (s32(code) + 8);
+        _assert_msg_(DYNA_REC, distance > -33554432
+                     && distance <=  33554432,
+                     "B out of range (%p calls %p)", code, fnptr);
+
+	Write32(condition | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));
+}
+
+void ARMXEmitter::B(ARMReg src)
+{
+	Write32(condition | 0x12FFF10 | src);
+}
+
+void ARMXEmitter::BL(const void *fnptr)
+{
+	s32 distance = (s32)fnptr - (s32(code) + 8);
+        _assert_msg_(DYNA_REC, distance > -33554432
+                     && distance <=  33554432,
+                     "BL out of range (%p calls %p)", code, fnptr);
+	Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF));
+}
+void ARMXEmitter::BL(ARMReg src)
+{
+	Write32(condition | 0x12FFF30 | src);
+}
+void ARMXEmitter::PUSH(const int num, ...)
+{
+	u16 RegList = 0;
+	u8 Reg;
+	int i;
+	va_list vl;
+	va_start(vl, num);
+	for (i=0;i<num;i++)
+	{
+		Reg = va_arg(vl, u32);
+		RegList |= (1 << Reg);
+	}
+	va_end(vl);
+	Write32(condition | (2349 << 16) | RegList);
+}
+void ARMXEmitter::POP(const int num, ...)
+{
+	u16 RegList = 0;
+	u8 Reg;
+	int i;
+	va_list vl;
+	va_start(vl, num);
+	for (i=0;i<num;i++)
+	{
+		Reg = va_arg(vl, u32);
+		RegList |= (1 << Reg);
+	}
+	va_end(vl);
+	Write32(condition | (2237 << 16) | RegList);
+}
+
+void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2)
+{
+	Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | op2.Imm5() | (op << 4) | src);
+}
+void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2)
+{
+	Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | (op2 << 8) | (op << 4) | src);
+}
+
+// IMM, REG, IMMSREG, RSR 
+// -1 for invalid if the instruction doesn't support that
+const s32 InstOps[][4] = {{16, 0, 0, 0}, // AND(s)
+						  {17, 1, 1, 1}, // EOR(s)
+						  {18, 2, 2, 2}, // SUB(s)
+						  {19, 3, 3, 3}, // RSB(s)
+						  {20, 4, 4, 4}, // ADD(s)
+						  {21, 5, 5, 5}, // ADC(s)
+						  {22, 6, 6, 6}, // SBC(s)
+						  {23, 7, 7, 7}, // RSC(s)
+						  {24, 8, 8, 8}, // TST
+						  {25, 9, 9, 9}, // TEQ
+						  {26, 10, 10, 10}, // CMP
+						  {27, 11, 11, 11}, // CMN
+						  {28, 12, 12, 12}, // ORR(s)
+						  {29, 13, 13, 13}, // MOV(s)
+						  {30, 14, 14, 14}, // BIC(s)
+						  {31, 15, 15, 15}, // MVN(s)
+						  {24, -1, -1, -1}, // MOVW
+						  {26, -1, -1, -1}, // MOVT
+						 }; 
+
+const char *InstNames[] = { "AND",
+							"EOR",
+							"SUB",
+							"RSB",
+							"ADD",
+							"ADC",
+							"SBC",
+							"RSC",
+							"TST",
+							"TEQ",
+							"CMP",
+							"CMN",
+							"ORR",
+							"MOV",
+							"BIC",
+							"MVN"
+						  };
+
+void ARMXEmitter::AND (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm); }
+void ARMXEmitter::ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm, true); }
+void ARMXEmitter::EOR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm); }
+void ARMXEmitter::EORS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm, true); }
+void ARMXEmitter::SUB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm); }
+void ARMXEmitter::SUBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm, true); }
+void ARMXEmitter::RSB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm); }
+void ARMXEmitter::RSBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm, true); }
+void ARMXEmitter::ADD (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm); }
+void ARMXEmitter::ADDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm, true); }
+void ARMXEmitter::ADC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm); }
+void ARMXEmitter::ADCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm, true); }
+void ARMXEmitter::SBC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm); }
+void ARMXEmitter::SBCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm, true); }
+void ARMXEmitter::RSC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm); }
+void ARMXEmitter::RSCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm, true); }
+void ARMXEmitter::TST (			  ARMReg Rn, Operand2 Rm) { WriteInstruction(8, R0, Rn, Rm, true); }
+void ARMXEmitter::TEQ (			  ARMReg Rn, Operand2 Rm) { WriteInstruction(9, R0, Rn, Rm, true); }
+void ARMXEmitter::CMP (			  ARMReg Rn, Operand2 Rm) { WriteInstruction(10, R0, Rn, Rm, true); }
+void ARMXEmitter::CMN (			  ARMReg Rn, Operand2 Rm) { WriteInstruction(11, R0, Rn, Rm, true); }
+void ARMXEmitter::ORR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm); }
+void ARMXEmitter::ORRS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm, true); }
+void ARMXEmitter::MOV (ARMReg Rd,			 Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm); }
+void ARMXEmitter::MOVS(ARMReg Rd,			 Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm, true); }
+void ARMXEmitter::BIC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm); }
+void ARMXEmitter::BICS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm, true); }
+void ARMXEmitter::MVN (ARMReg Rd,			 Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm); }
+void ARMXEmitter::MVNS(ARMReg Rd,			 Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm, true); }
+void ARMXEmitter::MOVW(ARMReg Rd,			 Operand2 Rm) { WriteInstruction(16, Rd, R0, Rm); }
+void ARMXEmitter::MOVT(ARMReg Rd, Operand2 Rm, bool TopBits) { WriteInstruction(17, Rd, R0, TopBits ? Rm.Value >> 16 : Rm); }
+
+void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags) // This can get renamed later
+{
+	s32 op = InstOps[Op][Rm.GetType()]; // Type always decided by last operand
+	u32 Data = Rm.GetData();
+	if (Rm.GetType() == TYPE_IMM)
+	{
+		switch (Op)
+		{
+			// MOV cases that support IMM16
+			case 16:
+			case 17:
+				Data = Rm.Imm16();
+			break;
+			default:
+			break;
+		}
+	}
+	if (op == -1)
+		_assert_msg_(DYNA_REC, false, "%s not yet support %d", InstNames[Op], Rm.GetType()); 
+	Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data);
+}
+
+// Data Operations
+void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2)
+{
+	Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2);
+}
+void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)
+{
+	if (!cpu_info.bIDIVa)
+		PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer.");
+	WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend);
+}
+void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)
+{
+	if (!cpu_info.bIDIVa)
+		PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer.");
+	WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend);
+}
+void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);}
+void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);}
+void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2)	  { WriteShiftedDataOp(1, false, dest, src, op2);} 
+void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, ARMReg op2)	  { WriteShiftedDataOp(1, true, dest, src, op2);}
+void ARMXEmitter::MUL (ARMReg dest,	ARMReg src, ARMReg op2)
+{
+	Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2);
+}
+void ARMXEmitter::MULS(ARMReg dest,	ARMReg src, ARMReg op2)
+{
+	Write32(condition | (1 << 20) | (dest << 16) | (src << 8) | (9 << 4) | op2);
+}
+
+void ARMXEmitter::Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) {
+	Write32(condition | (op << 20) | (destHi << 16) | (destLo << 12) | (rm << 8) | (9 << 4) | rn);
+}
+
+void ARMXEmitter::UMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
+{
+	Write4OpMultiply(0x8, destLo, destHi, rn, rm);
+}
+
+void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
+{
+	Write4OpMultiply(0xC, destLo, destHi, rn, rm);
+}
+void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2)
+{
+	Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2);
+}
+void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation)
+{
+	SXTAH(dest, (ARMReg)15, op2, rotation);
+}
+void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation) 
+{
+	// bits ten and 11 are the rotation amount, see 8.8.232 for more
+	// information
+	Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2);
+}
+void ARMXEmitter::REV (ARMReg dest, ARMReg src				) 
+{
+	Write32(condition | (107 << 20) | (15 << 16) | (dest << 12) | (243 << 4) | src);
+}
+void ARMXEmitter::REV16(ARMReg dest, ARMReg src)
+{
+	Write32(condition | (0x3DF << 16) | (dest << 12) | (0xFD << 4) | src);
+}
+
+void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g,		Operand2 op2)
+{
+	Write32(condition | (0x320F << 12) | (write_nzcvq << 19) | (write_g << 18) | op2.Imm12Mod());
+}
+void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g,		ARMReg src)
+{
+	Write32(condition | (0x120F << 12) | (write_nzcvq << 19) | (write_g << 18) | src);
+}
+void ARMXEmitter::MRS (ARMReg dest)
+{
+	Write32(condition | (16 << 20) | (15 << 16) | (dest << 12));
+}
+void ARMXEmitter::WriteStoreOp(u32 op, ARMReg dest, ARMReg src, Operand2 op2)
+{
+	if (op2.GetData() == 0) // Don't index
+		Write32(condition | 0x01800000 | (op << 20) | (dest << 16) | (src << 12) | op2.Imm12());
+	else
+		Write32(condition | (op << 20) | (3 << 23) | (dest << 16) | (src << 12) | op2.Imm12()); 
+}
+void ARMXEmitter::STR (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x40, dest, src, op);}
+void ARMXEmitter::STRB(ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x44, dest, src, op);}
+void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
+{
+	Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | offset);
+}
+void ARMXEmitter::LDREX(ARMReg dest, ARMReg base)
+{
+	Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F);
+}
+void ARMXEmitter::STREX(ARMReg dest, ARMReg base, ARMReg op)
+{
+	_assert_msg_(DYNA_REC, (dest != base && dest != op), "STREX dest can't be other two registers");
+	Write32(condition | (24 << 20) | (base << 16) | (dest << 12) | (0xF9 << 4) | op);
+}
+void ARMXEmitter::DMB ()
+{
+	Write32(0xF57FF05E);
+}
+void ARMXEmitter::SVC(Operand2 op)
+{
+	Write32(condition | (0x0F << 24) | op.Imm24());
+}
+
+void ARMXEmitter::LDR (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x41, src, dest, op);}
+void ARMXEmitter::LDRH(ARMReg dest, ARMReg src, Operand2 op)
+{
+	u8 Imm = op.Imm8();
+	Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F));
+}
+void ARMXEmitter::LDRB(ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x45, src, dest, op);}
+
+void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
+{
+	Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset);
+}
+void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList)
+{
+	Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList);
+}
+void ARMXEmitter::STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)
+{
+	u16 RegList = 0;
+	u8 Reg;
+	int i;
+	va_list vl;
+	va_start(vl, Regnum);
+	for (i=0;i<Regnum;i++)
+	{
+		Reg = va_arg(vl, u32);
+		RegList |= (1 << Reg);
+	}
+	va_end(vl);
+	WriteRegStoreOp(0x90, dest, WriteBack, RegList);
+}
+void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)
+{
+	u16 RegList = 0;
+	u8 Reg;
+	int i;
+	va_list vl;
+	va_start(vl, Regnum);
+	for (i=0;i<Regnum;i++)
+	{
+		Reg = va_arg(vl, u32);
+		RegList |= (1 << Reg);
+	}
+	va_end(vl);
+	WriteRegStoreOp(0x89, dest, WriteBack, RegList);
+}
+
+ARMReg ARMXEmitter::SubBase(ARMReg Reg)
+{
+	if (Reg >= S0)
+	{
+		if (Reg >= D0)
+		{
+			if (Reg >= Q0)
+				return (ARMReg)((Reg - Q0) * 2); // Always gets encoded as a double register
+			return (ARMReg)(Reg - D0);
+		}
+		return (ARMReg)(Reg - S0);
+	}
+	return Reg;
+}
+// NEON Specific
+void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VADD(integer)");
+	_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VADD(integer) when CPU doesn't support it");
+
+	bool register_quad = Vd >= Q0;
+		
+	// Gets encoded as a double register
+	Vd = SubBase(Vd);
+	Vn = SubBase(Vn);
+	Vm = SubBase(Vm);
+
+	Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
+		| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
+		| ((Vm & 0x10) << 2) | (Vm & 0xF)); 
+
+}
+void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to VSUB(integer)");
+	_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VSUB(integer) when CPU doesn't support it");
+
+	// Gets encoded as a double register
+	Vd = SubBase(Vd);
+	Vn = SubBase(Vn);
+	Vm = SubBase(Vm);
+
+	Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
+		| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \
+		| ((Vm & 0x10) << 2) | (Vm & 0xF)); 
+
+}
+
+// VFP Specific
+
+void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 op)
+{
+	_assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); 
+	_assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR");
+	_assert_msg_(DYNA_REC, !(op & 3), "Offset needs to be word aligned");
+	bool single_reg = Dest < D0;
+
+	Dest = SubBase(Dest);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1B << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \
+			| ((Dest & 0x1E) << 11) | (10 << 8) | (op >> 2));	
+
+	}
+	else
+	{
+		Write32(NO_COND | (0x1B << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \
+			| ((Dest & 0xF) << 12) | (11 << 8) | (op >> 2));	
+	}
+}
+void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 op)
+{
+	_assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR");
+	_assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR");
+	_assert_msg_(DYNA_REC, !(op & 3), "Offset needs to be word aligned");
+	bool single_reg = Src < D0;
+
+	Src = SubBase(Src);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1B << 23) | ((Src & 0x1) << 22) | (Base << 16) \
+			| ((Src & 0x1E) << 11) | (10 << 8) | (op >> 2));	
+
+	}
+	else
+	{
+		Write32(NO_COND | (0x1B << 23) | ((Src & 0x10) << 18) | (Base << 16) \
+			| ((Src & 0xF) << 12) | (11 << 8) | (op >> 2));	
+	}
+}
+void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd < Q0, "Passed invalid Vd to VCMP");
+	bool single_reg = Vd < D0;
+	
+	Vd = SubBase(Vd);
+	Vm = SubBase(Vm);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x34 << 16) | ((Vd & 0x1E) << 11) \
+			| (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
+	}
+	else
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x34 << 16) | ((Vd & 0xF) << 12) \
+			| (0x2F << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF));
+	}
+}
+void ARMXEmitter::VCMP(ARMReg Vd)
+{
+	_assert_msg_(DYNA_REC, Vd < Q0, "Passed invalid Vd to VCMP");
+	bool single_reg = Vd < D0;
+
+	Vd = SubBase(Vd);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x35 << 16) | ((Vd & 0x1E) << 11) \
+			| (0x2B << 6));
+	}
+	else
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x35 << 16) | ((Vd & 0xF) << 12) \
+			| (0x2F << 6)); 
+	}
+}
+void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd < Q0, "Pased invalid dest register to VSQRT");
+	_assert_msg_(DYNA_REC, Vn < Q0, "Passed invalid Vn to VSQRT");
+	_assert_msg_(DYNA_REC, Vm < Q0, "Passed invalid Vm to VSQRT");
+	bool single_reg = Vd < D0;
+
+	Vd = SubBase(Vd);
+	Vn = SubBase(Vn);
+	Vm = SubBase(Vm);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | ((Vn & 0x1E) << 15) \
+			| ((Vd & 0x1E) << 11) | (0xA << 8) | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) \
+			| (Vm >> 1));
+	}
+	else
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \
+			| ((Vd & 0xF) << 12) | (0xB << 8) | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) \
+			| (Vm & 0xF));
+	}
+}
+void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd < Q0, "Pased invalid dest register to VSQRT");
+	_assert_msg_(DYNA_REC, Vm < Q0, "Passed invalid Vm to VSQRT");
+	bool single_reg = Vd < D0;
+
+	Vd = SubBase(Vd);
+	Vm = SubBase(Vm);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \
+			| ((Vd & 0x1E) << 11) | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
+	}
+	else
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \
+			| ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
+	}
+}
+// VFP and ASIMD
+void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd < Q0, "VABS doesn't currently support Quad reg");
+	_assert_msg_(DYNA_REC, Vd >= S0, "VABS doesn't support ARM Regs");
+	bool single_reg = Vd < D0;
+
+	Vd = SubBase(Vd);
+	Vm = SubBase(Vm);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0xEB << 20) | ((Vd & 0x1) << 6) | ((Vd & 0x1E) << 11) \
+			| (0xAC << 4) | ((Vm & 0x1) << 5) | (Vm >> 1));
+	}
+	else
+	{
+		Write32(NO_COND | (0xEB << 20) | ((Vd & 0x10) << 18) | ((Vd & 0xF) << 12) \
+			| (0xBC << 4) | ((Vm & 0x10) << 1) | (Vm & 0xF));
+	}
+}
+void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD");
+	_assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VADD");
+	_assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VADD");
+	bool single_reg = Vd < D0;
+	bool double_reg = Vd < Q0;
+
+	Vd = SubBase(Vd);
+	Vn = SubBase(Vn);
+	Vm = SubBase(Vm);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
+			| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
+			| ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1));
+	}
+	else
+	{
+		if (double_reg)
+		{
+			Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \
+				| ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \
+				| ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF));
+		}
+		else
+		{
+			_assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!");
+			Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \
+				| ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) \
+				| (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
+		}
+	}
+}
+void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm)
+{
+	_assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VSUB");
+	_assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VSUB");
+	_assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VSUB");
+	bool single_reg = Vd < D0;
+	bool double_reg = Vd < Q0;
+	
+	Vd = SubBase(Vd);
+	Vn = SubBase(Vn);
+	Vm = SubBase(Vm);
+
+	if (single_reg)
+	{
+		Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \
+			| ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \
+			| ((Vn & 0x1) << 7) | (1 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1));
+	}
+	else
+	{
+		if (double_reg)
+		{
+			Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \
+				| ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \
+				| ((Vn & 0x10) << 3) | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
+		}
+		else
+		{
+			_assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!");
+			Write32((0xF2 << 24) | (1 << 21) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \
+				| ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) \
+				| (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF));
+		}
+	}
+}
+
+void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high)
+{
+	_assert_msg_(DYNA_REC, Src < S0, "This VMOV doesn't support SRC other than ARM Reg");
+	_assert_msg_(DYNA_REC, Dest >= D0, "This VMOV doesn't support DEST other than VFP");
+
+	Dest = SubBase(Dest);
+
+	Write32(NO_COND | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \
+		| (11 << 8) | ((Dest & 0x10) << 3) | (1 << 4));
+}
+void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
+{
+	if (Dest > R15)
+	{
+		if (Src < S0)
+		{
+			if (Dest < D0)
+			{
+				// Moving to a Neon register FROM ARM Reg
+				Dest = (ARMReg)(Dest - S0); 
+				Write32(NO_COND | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \
+						| (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4));
+				return;
+			}
+			else
+			{
+				// Move 64bit from Arm reg
+				_assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM to NEON");
+				return;
+			}
+		}
+	}
+	else
+	{
+		if (Src > R15)
+		{
+			if (Src < D0)
+			{
+				// Moving to ARM Reg from Neon Register
+				Src = (ARMReg)(Src - S0);
+				Write32(NO_COND | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \
+						| (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4));
+				return;
+			}
+			else
+			{
+				// Move 64bit To Arm reg
+				_assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM From NEON");
+				return;
+			}
+		}
+		else
+		{
+			// Move Arm reg to Arm reg
+			_assert_msg_(DYNA_REC, false, "VMOV doesn't support moving ARM registers");
+		}
+	}
+	// Moving NEON registers
+	int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4;
+	int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4;
+	bool Single = DestSize == 1;
+	bool Quad = DestSize == 4;
+
+	_assert_msg_(DYNA_REC, SrcSize == DestSize, "VMOV doesn't support moving different register sizes");
+
+	Dest = SubBase(Dest);
+	Src = SubBase(Src);
+
+	if (Single)
+	{
+		Write32(NO_COND | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \
+				| (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1));
+	}
+	else
+	{
+		// Double and quad
+		if (Quad)
+		{
+			_assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use quad registers when you don't support ASIMD."); 
+			// Gets encoded as a Double register
+			Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \
+				| ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \
+				| ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF));
+
+		}
+		else
+		{
+			Write32(NO_COND | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \
+				| (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF));
+		}
+	}
+}
+
+}
--- a/Source/Core/Common/Src/ArmEmitter.h
+++ b/Source/Core/Common/Src/ArmEmitter.h
@ -0,0 +1,587 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
+
+#ifndef _DOLPHIN_ARM_CODEGEN_
+#define _DOLPHIN_ARM_CODEGEN_
+
+#include "Common.h"
+#include "MemoryUtil.h"
+#if defined(__SYMBIAN32__) || defined(PANDORA)
+#include <signal.h>
+#endif
+
+#undef _IP
+#undef R0
+#undef _SP
+#undef _LR
+#undef _PC
+
+namespace ArmGen
+{
+enum ARMReg
+{
+	// GPRs
+	R0 = 0, R1, R2, R3, R4, R5,
+	R6, R7, R8, R9, R10, R11,
+
+	// SPRs
+	// R13 - R15 are SP, LR, and PC.
+	// Almost always referred to by name instead of register number
+	R12 = 12, R13 = 13, R14 = 14, R15 = 15,
+	_IP = 12, _SP = 13, _LR = 14, _PC = 15,
+
+
+	// VFP single precision registers
+	S0, S1, S2, S3, S4, S5, S6,
+	S7, S8, S9, S10, S11, S12, S13,
+	S14, S15, S16, S17, S18, S19, S20,
+	S21, S22, S23, S24, S25, S26, S27,
+	S28, S29, S30, S31,
+
+	// VFP Double Precision registers
+	D0, D1, D2, D3, D4, D5, D6, D7,
+	D8, D9, D10, D11, D12, D13, D14, D15,
+	D16, D17, D18, D19, D20, D21, D22, D23,
+	D24, D25, D26, D27, D28, D29, D30, D31,
+	
+	// ASIMD Quad-Word registers
+	Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
+	Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
+	INVALID_REG = 0xFFFFFFFF
+};
+
+enum CCFlags
+{
+	CC_EQ = 0, // Equal
+	CC_NEQ, // Not equal
+	CC_CS, // Carry Set
+	CC_CC, // Carry Clear
+	CC_MI, // Minus (Negative)
+	CC_PL, // Plus
+	CC_VS, // Overflow
+	CC_VC, // No Overflow
+	CC_HI, // Unsigned higher
+	CC_LS, // Unsigned lower or same
+	CC_GE, // Signed greater than or equal
+	CC_LT, // Signed less than
+	CC_GT, // Signed greater than
+	CC_LE, // Signed less than or equal
+	CC_AL, // Always (unconditional) 14
+	CC_HS = CC_CS, // Alias of CC_CS  Unsigned higher or same
+	CC_LO = CC_CC, // Alias of CC_CC  Unsigned lower
+};
+const u32 NO_COND = 0xE0000000;
+
+enum ShiftType
+{
+	ST_LSL = 0,
+	ST_ASL = 0,
+	ST_LSR = 1,
+	ST_ASR = 2,
+	ST_ROR = 3,
+	ST_RRX = 4
+};
+enum IntegerSize
+{
+	I_I8 = 0, 
+	I_I16,
+	I_I32,
+	I_I64
+};
+
+enum
+{
+	NUMGPRs = 13,
+};
+
+class ARMXEmitter;
+
+enum OpType
+{
+	TYPE_IMM = 0,
+	TYPE_REG,
+	TYPE_IMMSREG,
+	TYPE_RSR,
+	TYPE_MEM
+};
+
+// This is no longer a proper operand2 class. Need to split up.
+class Operand2
+{
+	friend class ARMXEmitter;
+protected:
+	u32 Value;
+
+private:
+	OpType Type;	
+
+	// IMM types
+	u8	Rotation; // Only for u8 values
+
+	// Register types
+	u8 IndexOrShift;
+	ShiftType Shift;
+public:
+	OpType GetType()
+	{
+		return Type;
+	}
+	Operand2() {} 
+	Operand2(u32 imm, OpType type = TYPE_IMM)
+	{ 
+		Type = type; 
+		Value = imm; 
+		Rotation = 0;		
+	}
+
+	Operand2(ARMReg Reg)
+	{
+		Type = TYPE_REG;
+		Value = Reg;
+		Rotation = 0;
+	}
+	Operand2(u8 imm, u8 rotation)
+	{
+		Type = TYPE_IMM;
+		Value = imm;
+		Rotation = rotation;
+	}
+	Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR
+	{
+		Type = TYPE_RSR;
+		_assert_msg_(DYNA_REC, type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount");
+		IndexOrShift = shift;
+		Shift = type;
+		Value = base;
+	}
+
+	Operand2(u8 shift, ShiftType type, ARMReg base)// For IMM shifted register
+	{
+		if(shift == 32) shift = 0;
+		switch (type)
+		{
+		case ST_LSL:
+			_assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSL %u", shift);
+			break;
+		case ST_LSR:
+			_assert_msg_(DYNA_REC, shift <= 32, "Invalid Operand2: LSR %u", shift);
+			if (!shift)
+				type = ST_LSL;
+			if (shift == 32)
+				shift = 0;
+			break;
+		case ST_ASR:
+			_assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSR %u", shift);
+			if (!shift)
+				type = ST_LSL;
+			if (shift == 32)
+				shift = 0;
+			break;
+		case ST_ROR:
+			_assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: ROR %u", shift);
+			if (!shift)
+				type = ST_LSL;
+			break;
+		case ST_RRX:
+			_assert_msg_(DYNA_REC, shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount");
+			type = ST_ROR;
+			break;
+		}
+		IndexOrShift = shift;
+		Shift = type;
+		Value = base;
+		Type = TYPE_IMMSREG;
+	}
+	const u32 GetData()
+	{
+		switch(Type)
+		{
+		case TYPE_IMM:
+			return Imm12Mod(); // This'll need to be changed later
+		case TYPE_REG:
+			return Rm();
+		case TYPE_IMMSREG:
+			return IMMSR();
+		case TYPE_RSR:
+			return RSR();
+		default:
+			_assert_msg_(DYNA_REC, false, "GetData with Invalid Type");
+			return 0;
+		}
+	}
+	const u32 IMMSR() // IMM shifted register
+	{
+		_assert_msg_(DYNA_REC, Type = TYPE_IMMSREG, "IMMSR must be imm shifted register");
+		return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value);
+	}
+	const u32 RSR() // Register shifted register
+	{
+		_assert_msg_(DYNA_REC, Type == TYPE_RSR, "RSR must be RSR Of Course");
+		return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value;
+	}
+	const u32 Rm()
+	{
+		_assert_msg_(DYNA_REC, Type == TYPE_REG, "Rm must be with Reg");
+		return Value;
+	}
+
+	const u32 Imm5()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm5 not IMM value");
+		return ((Value & 0x0000001F) << 7);
+	}
+	const u32 Imm8()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value");
+		return Value & 0xFF;
+	}
+	const u32 Imm8Rot() // IMM8 with Rotation
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value");
+		_assert_msg_(DYNA_REC, (Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation);
+		return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF);
+	}
+	const u32 Imm12()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12 not IMM");
+		return (Value & 0x00000FFF);
+	}
+
+	const u32 Imm12Mod()
+	{
+		// This is a IMM12 with the top four bits being rotation and the
+		// bottom eight being a IMM. This is for instructions that need to
+		// expand a 8bit IMM to a 32bit value and gives you some rotation as
+		// well.
+		// Each rotation rotates to the right by 2 bits
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12Mod not IMM");
+		return ((Rotation & 0xF) << 8) | (Value & 0xFF);
+	}
+	const u32 Imm16()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM");
+		return ( (Value & 0xF000) << 4) | (Value & 0x0FFF);
+	}
+	const u32 Imm16Low()
+	{
+		return Imm16();
+	}
+	const u32 Imm16High() // Returns high 16bits
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM");
+		return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF);
+	}
+	const u32 Imm24()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM");
+		return (Value & 0x0FFFFFFF);	
+	}
+	// NEON and ASIMD specific
+	const u32 Imm8ASIMD()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8ASIMD not IMM");
+		return  ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF);
+	}
+	const u32 Imm8VFP()
+	{
+		_assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8VFP not IMM");
+		return ((Value & 0xF0) << 12) | (Value & 0xF);
+	}
+};
+
+// Use these when you don't know if an imm can be represented as an operand2.
+// This lets you generate both an optimal and a fallback solution by checking
+// the return value, which will be false if these fail to find a Operand2 that
+// represents your 32-bit imm value.
+bool TryMakeOperand2(u32 imm, Operand2 &op2);
+bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse);
+bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated);
+
+inline Operand2 R(ARMReg Reg)	{ return Operand2(Reg, TYPE_REG); }
+inline Operand2 IMM(u32 Imm)	{ return Operand2(Imm, TYPE_IMM); }
+inline Operand2 Mem(void *ptr)	{ return Operand2((u32)ptr, TYPE_IMM); }
+//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
+#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str)))
+
+
+struct FixupBranch
+{
+	u8 *ptr;
+	u32 condition; // Remembers our codition at the time
+	int type; //0 = B 1 = BL
+};
+
+typedef const u8* JumpTarget;
+
+class ARMXEmitter
+{
+	friend struct OpArg;  // for Write8 etc
+private:
+	u8 *code, *startcode;
+	u8 *lastCacheFlushEnd;
+	u32 condition;
+
+	void WriteStoreOp(u32 op, ARMReg dest, ARMReg src, Operand2 op2);
+	void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList);
+	void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2);
+	void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2);
+	void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2);
+
+	void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
+	
+	// New Ops
+	void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false);
+
+protected:
+	inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
+
+public:
+	ARMXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
+		condition = CC_AL << 28;
+	}
+	ARMXEmitter(u8 *code_ptr) {
+		code = code_ptr;
+		lastCacheFlushEnd = code_ptr;
+		startcode = code_ptr;
+		condition = CC_AL << 28;
+	}
+	virtual ~ARMXEmitter() {}
+
+	void SetCodePtr(u8 *ptr);
+	void ReserveCodeSpace(u32 bytes);
+	const u8 *AlignCode16();
+	const u8 *AlignCodePage();
+	const u8 *GetCodePtr() const;
+	void FlushIcache();
+	void FlushIcacheSection(u8 *start, u8 *end);
+	u8 *GetWritableCodePtr();
+
+	void SetCC(CCFlags cond = CC_AL);
+
+	// Special purpose instructions
+
+	// Dynamic Endian Switching
+	void SETEND(bool BE);
+	// Debug Breakpoint
+	void BKPT(u16 arg);
+
+	// Hint instruction
+	void YIELD();
+	
+	// Do nothing
+	void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
+	
+#ifdef CALL
+#undef CALL
+#endif
+
+	// Branching
+	FixupBranch B();
+	FixupBranch B_CC(CCFlags Cond);
+	void B_CC(CCFlags Cond, const void *fnptr);
+	FixupBranch BL();
+	FixupBranch BL_CC(CCFlags Cond);
+	void SetJumpTarget(FixupBranch const &branch);
+	
+	void B (const void *fnptr);
+	void B (ARMReg src);
+	void BL(const void *fnptr);
+	void BL(ARMReg src);
+
+	void PUSH(const int num, ...);
+	void POP(const int num, ...);
+
+	// New Data Ops
+	void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm);
+	void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm);
+	void EOR (ARMReg dest, ARMReg src, Operand2 op2);
+	void EORS(ARMReg dest, ARMReg src, Operand2 op2);
+	void SUB (ARMReg dest, ARMReg src, Operand2 op2);
+	void SUBS(ARMReg dest, ARMReg src, Operand2 op2);
+	void RSB (ARMReg dest, ARMReg src, Operand2 op2);
+	void RSBS(ARMReg dest, ARMReg src, Operand2 op2);
+	void ADD (ARMReg dest, ARMReg src, Operand2 op2);
+	void ADDS(ARMReg dest, ARMReg src, Operand2 op2);
+	void ADC (ARMReg dest, ARMReg src, Operand2 op2);
+	void ADCS(ARMReg dest, ARMReg src, Operand2 op2);
+	void LSL (ARMReg dest, ARMReg src, Operand2 op2);
+	void LSL (ARMReg dest, ARMReg src, ARMReg op2);
+	void LSLS(ARMReg dest, ARMReg src, Operand2 op2);
+	void LSLS(ARMReg dest, ARMReg src, ARMReg op2);
+	void SBC (ARMReg dest, ARMReg src, Operand2 op2);
+	void SBCS(ARMReg dest, ARMReg src, Operand2 op2);
+	void REV (ARMReg dest, ARMReg src);
+	void REV16 (ARMReg dest, ARMReg src);
+	void RSC (ARMReg dest, ARMReg src, Operand2 op2);
+	void RSCS(ARMReg dest, ARMReg src, Operand2 op2);
+	void TST (             ARMReg src, Operand2 op2);
+	void TEQ (             ARMReg src, Operand2 op2);
+	void CMP (             ARMReg src, Operand2 op2);
+	void CMN (             ARMReg src, Operand2 op2);
+	void ORR (ARMReg dest, ARMReg src, Operand2 op2);
+	void ORRS(ARMReg dest, ARMReg src, Operand2 op2);
+	void MOV (ARMReg dest,             Operand2 op2);
+	void MOVS(ARMReg dest,             Operand2 op2);
+	void BIC (ARMReg dest, ARMReg src, Operand2 op2);   // BIC = ANDN
+	void BICS(ARMReg dest, ARMReg src, Operand2 op2);
+	void MVN (ARMReg dest,             Operand2 op2);
+	void MVNS(ARMReg dest,             Operand2 op2);
+	void MOVW(ARMReg dest, 			   Operand2 op2);
+	void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false);
+
+	// UDIV and SDIV are only available on CPUs that have 
+	// the idiva hardare capacity
+	void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
+	void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
+
+	void MUL (ARMReg dest,	ARMReg src, ARMReg op2);
+	void MULS(ARMReg dest,	ARMReg src, ARMReg op2);
+	
+	void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
+	void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
+
+	void SXTB(ARMReg dest, ARMReg op2);
+	void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
+	void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
+	// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
+	// Just need to put an underscore here, bit annoying.
+	void _MSR (bool nzcvq, bool g, Operand2 op2);
+	void _MSR (bool nzcvq, bool g, ARMReg src	   );
+	void MRS  (ARMReg dest);
+
+	// Memory load/store operations
+	void LDR (ARMReg dest, ARMReg src, Operand2 op2 = 0);
+	// Offset adds to the base register in LDR
+	void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
+	void LDRH(ARMReg dest, ARMReg src, Operand2 op = 0); 
+	void LDRB(ARMReg dest, ARMReg src, Operand2 op2 = 0);
+	void STR (ARMReg dest, ARMReg src, Operand2 op2 = 0);
+	// Offset adds on to the destination register in STR
+	void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
+
+	void STRB(ARMReg dest, ARMReg src, Operand2 op2 = 0);
+	void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
+	void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
+	
+	// Exclusive Access operations
+	void LDREX(ARMReg dest, ARMReg base);
+	// dest contains the result if the instruction managed to store the value
+	void STREX(ARMReg dest, ARMReg base, ARMReg op);
+	void DMB ();
+	void SVC(Operand2 op);
+
+	// NEON and ASIMD instructions
+	// None of these will be created with conditional since ARM
+	// is deprecating conditional execution of ASIMD instructions.
+	// ASIMD instructions don't even have a conditional encoding.
+
+	// Subtracts the base from the register to give us the real one
+	ARMReg SubBase(ARMReg Reg);	
+	// NEON Only
+	void VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
+	void VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
+		
+	// VFP Only
+	void VLDR(ARMReg Dest, ARMReg Base, u16 op);
+	void VSTR(ARMReg Src,  ARMReg Base, u16 op);
+	void VCMP(ARMReg Vd, ARMReg Vm);
+	// Compares against zero
+	void VCMP(ARMReg Vd);
+	void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm);
+	void VSQRT(ARMReg Vd, ARMReg Vm);
+	
+	// NEON and VFP
+	void VABS(ARMReg Vd, ARMReg Vm);
+	void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm);
+	void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm);
+
+	void VMOV(ARMReg Dest, ARMReg Src, bool high);
+	void VMOV(ARMReg Dest, ARMReg Src);
+
+	void QuickCallFunction(ARMReg scratchreg, void *func);
+	// Utility functions
+	void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
+	void ARMABI_MOVI2M(Operand2 op, Operand2 val);	
+};  // class ARMXEmitter
+
+
+// Everything that needs to generate X86 code should inherit from this.
+// You get memory management for free, plus, you can use all the MOV etc functions without
+// having to prefix them with gen-> or something similar.
+class ARMXCodeBlock : public ARMXEmitter
+{
+protected:
+	u8 *region;
+	size_t region_size;
+
+public:
+	ARMXCodeBlock() : region(NULL), region_size(0) {}
+	virtual ~ARMXCodeBlock() { if (region) FreeCodeSpace(); }
+
+	// Call this before you generate any code.
+	void AllocCodeSpace(int size)
+	{
+		region_size = size;
+		region = (u8*)AllocateExecutableMemory(region_size);
+		SetCodePtr(region);
+	}
+
+	// Always clear code space with breakpoints, so that if someone accidentally executes
+	// uninitialized, it just breaks into the debugger.
+	void ClearCodeSpace() 
+	{
+		// x86/64: 0xCC = breakpoint
+		memset(region, 0xCC, region_size);
+		ResetCodePtr();
+	}
+
+	// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
+	void FreeCodeSpace()
+	{
+		FreeMemoryPages(region, region_size);
+		region = NULL;
+		region_size = 0;
+	}
+
+	bool IsInSpace(u8 *ptr)
+	{
+		return ptr >= region && ptr < region + region_size;
+	}
+
+	// Cannot currently be undone. Will write protect the entire code region.
+	// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
+	void WriteProtect()
+	{
+		WriteProtectMemory(region, region_size, true);		
+	}
+
+	void ResetCodePtr()
+	{
+		SetCodePtr(region);
+	}
+
+	size_t GetSpaceLeft() const
+	{
+		return region_size - (GetCodePtr() - region);
+	}
+};
+
+}  // namespace
+
+#endif // _DOLPHIN_INTEL_CODEGEN_
--- a/Source/Core/Common/Src/CPUDetect.h
+++ b/Source/Core/Common/Src/CPUDetect.h
@ -25,7 +25,8 @@ enum CPUVendor
 {
 	VENDOR_INTEL = 0,
 	VENDOR_AMD = 1,
-	VENDOR_OTHER = 2,
+	VENDOR_ARM = 2,
+	VENDOR_OTHER = 3,
 };

 struct CPUInfo
@ -55,6 +56,26 @@ struct CPUInfo
 	bool bAES;
 	bool bLAHFSAHF64;
 	bool bLongMode;
+	
+	// ARM specific CPUInfo
+	bool bSwp;
+	bool bHalf;
+	bool bThumb;
+	bool bFastMult;
+	bool bVFP;
+	bool bEDSP;
+	bool bThumbEE;
+	bool bNEON;
+	bool bVFPv3;
+	bool bTLS;
+	bool bVFPv4;
+	bool bIDIVa;
+	bool bIDIVt;
+	bool bArmV7;  // enable MOVT, MOVW etc
+	
+	// ARMv8 specific
+	bool bFP;
+	bool bASIMD;

 	// Call Detect()
 	explicit CPUInfo();
--- a/Source/Core/Common/Src/Common.h
+++ b/Source/Core/Common/Src/Common.h
@ -133,7 +133,9 @@ private:
 // wxWidgets does not have a true dummy macro for this.
 #define _trans(a) a

-#if defined __GNUC__
+#if defined _M_GENERIC
+#  define _M_SSE 0x0
+#elif defined __GNUC__
 # if defined __SSE4_2__
 #  define _M_SSE 0x402
 # elif defined __SSE4_1__
@ -144,7 +146,7 @@ private:
 #  define _M_SSE 0x300
 # endif
 #elif (_MSC_VER >= 1500) || __INTEL_COMPILER // Visual Studio 2008
-# define _M_SSE 0x402
+#  define _M_SSE 0x402
 #endif

 // Host communication.
--- a/Source/Core/Common/Src/CommonFuncs.h
+++ b/Source/Core/Common/Src/CommonFuncs.h
@ -35,7 +35,7 @@ template<> struct CompileTimeAssert<true> {};
 #define b32(x)  (b16(x) | (b16(x) >>16) )
 #define ROUND_UP_POW2(x)	(b32(x - 1) + 1)

-#if defined __GNUC__ && !defined __SSSE3__
+#if defined __GNUC__ && !defined __SSSE3__ && !defined _M_GENERIC
 #include <emmintrin.h>
 static __inline __m128i __attribute__((__always_inline__))
 _mm_shuffle_epi8(__m128i a, __m128i mask)
@ -60,6 +60,8 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
 // go to debugger mode
 	#ifdef GEKKO
 		#define Crash()
+	#elif defined _M_GENERIC
+		#define Crash() { exit(1); }
 	#else
 		#define Crash() {asm ("int $3");}
 	#endif
@ -136,6 +138,15 @@ inline u8 swap8(u8 _data) {return _data;}
 inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
 inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
 inline u64 swap64(u64 _data) {return _byteswap_uint64(_data);}
+#elif _M_ARM
+#ifdef ANDROID
+#undef swap16
+#undef swap32
+#undef swap64
+#endif
+inline u16 swap16 (u16 _data) { u32 data = _data; __asm__ ("rev16 %0, %1\n" : "=l" (data) : "l" (data)); return (u16)data;} 
+inline u32 swap32 (u32 _data) {__asm__ ("rev %0, %1\n" : "=l" (_data) : "l" (_data)); return _data;} 
+inline u64 swap64(u64 _data) {return ((u64)swap32(_data) << 32) | swap32(_data >> 32);}
 #elif __linux__
 inline u16 swap16(u16 _data) {return bswap_16(_data);}
 inline u32 swap32(u32 _data) {return bswap_32(_data);}
@ -161,7 +172,6 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
 inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);}
 inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);}
 inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);}
-
 }  // Namespace Common

 #endif // _COMMONFUNCS_H_
--- a/Source/Core/Common/Src/CommonPaths.h
+++ b/Source/Core/Common/Src/CommonPaths.h
@ -36,6 +36,9 @@
 	// You can use the File::GetUserPath() util for this
 	#define USERDATA_DIR "Contents/Resources/User"
 	#define DOLPHIN_DATA_DIR "Library/Application Support/Dolphin"
+#elif defined ANDROID
+	#define USERDATA_DIR "user"
+	#define DOLPHIN_DATA_DIR "/sdcard/dolphin-emu"
 #else
 	#define USERDATA_DIR "user"
 	#ifdef USER_DIR
@ -52,6 +55,8 @@
 	#define SYSDATA_DIR "Contents/Resources/Sys"
 	#define SHARED_USER_DIR	File::GetBundleDirectory() + \
 				DIR_SEP USERDATA_DIR DIR_SEP
+#elif defined ANDROID
+	#define SYSDATA_DIR "/sdcard/dolphin-emu"	
 #else
 	#ifdef DATA_DIR
 		#define SYSDATA_DIR DATA_DIR "sys"
--- a/Source/Core/Common/Src/FPURoundMode.h
+++ b/Source/Core/Common/Src/FPURoundMode.h
@ -0,0 +1,51 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+#ifndef FPU_ROUND_MODE_H_
+#define FPU_ROUND_MODE_H_
+#include "Common.h"
+
+namespace FPURoundMode
+{
+	enum RoundModes
+	{
+		ROUND_NEAR = 0,
+		ROUND_CHOP,
+		ROUND_UP,
+		ROUND_DOWN
+	};
+	enum PrecisionModes {
+		PREC_24 = 0,
+		PREC_53,
+		PREC_64
+	};
+	void SetRoundMode(u32 mode);
+
+	void SetPrecisionMode(u32 mode);
+	
+	void SetSIMDMode(u32 mode);
+
+	/*
+   There are two different flavors of float to int conversion:
+   _mm_cvtps_epi32() and _mm_cvttps_epi32(). The first rounds
+   according to the MXCSR rounding bits. The second one always
+   uses round towards zero.
+ */
+	void SaveSIMDState();
+	void LoadSIMDState();
+	void LoadDefaultSIMDState();
+}
+#endif
--- a/Source/Core/Common/Src/FileUtil.cpp
+++ b/Source/Core/Common/Src/FileUtil.cpp
@ -668,9 +668,10 @@ std::string &GetUserPath(const unsigned int DirIDX, const std::string &newPath)
 		if (File::Exists(ROOT_DIR DIR_SEP USERDATA_DIR))
 			paths[D_USER_IDX] = ROOT_DIR DIR_SEP USERDATA_DIR DIR_SEP;
 		else
-			paths[D_USER_IDX] = std::string(getenv("HOME") ? getenv("HOME") : getenv("PWD")) + DIR_SEP DOLPHIN_DATA_DIR DIR_SEP;
+			paths[D_USER_IDX] = std::string(getenv("HOME") ? 
+				getenv("HOME") : getenv("PWD") ? 
+				getenv("PWD") : "") + DIR_SEP DOLPHIN_DATA_DIR DIR_SEP;
 #endif
-		INFO_LOG(COMMON, "GetUserPath: Setting user directory to %s:", paths[D_USER_IDX].c_str());

 		paths[D_GCUSER_IDX]			= paths[D_USER_IDX] + GC_USER_DIR DIR_SEP;
 		paths[D_WIIROOT_IDX]		= paths[D_USER_IDX] + WII_USER_DIR;
--- a/Source/Core/Common/Src/GenericFPURoundMode.cpp
+++ b/Source/Core/Common/Src/GenericFPURoundMode.cpp
@ -0,0 +1,41 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#include "FPURoundMode.h"
+
+// Generic, do nothing
+namespace FPURoundMode
+{
+	void SetRoundMode(u32 mode)
+	{
+	}
+	void SetPrecisionMode(u32 mode)
+	{
+	}
+	void SetSIMDMode(u32 mode)
+	{
+	}
+	void SaveSIMDState()
+	{
+	}
+	void LoadSIMDState()
+	{
+	}
+	void LoadDefaultSIMDState()
+	{
+	}
+}
--- a/Source/Core/Common/Src/LogManager.cpp
+++ b/Source/Core/Common/Src/LogManager.cpp
@ -17,6 +17,9 @@

 #include <algorithm>

+#ifdef ANDROID
+#include "Host.h"
+#endif
 #include "LogManager.h"
 #include "ConsoleListener.h"
 #include "Timer.h"
@ -132,7 +135,9 @@ void LogManager::Log(LogTypes::LOG_LEVELS level, LogTypes::LOG_TYPE type,
 		Common::Timer::GetTimeFormatted().c_str(),
 		file, line, level_to_char[(int)level],
 		log->GetShortName(), temp);
-
+#ifdef ANDROID
+	Host_SysMessage(msg);	
+#endif
 	log->Trigger(level, msg);
 }

--- a/Source/Core/Common/Src/MathUtil.cpp
+++ b/Source/Core/Common/Src/MathUtil.cpp
@ -21,13 +21,6 @@
 #include <cmath>
 #include <numeric>

-namespace {
-
-static u32 saved_sse_state = _mm_getcsr();
-static const u32 default_sse_state = _mm_getcsr();
-
-}
-
 namespace MathUtil
 {

@ -114,23 +107,6 @@ u32 ClassifyFloat(float fvalue)

 }  // namespace

-void LoadDefaultSSEState()
-{
-	_mm_setcsr(default_sse_state);
-}
-
-
-void LoadSSEState()
-{
-	_mm_setcsr(saved_sse_state);
-}
-
-
-void SaveSSEState()
-{
-	saved_sse_state = _mm_getcsr();
-}
-
 inline void MatrixMul(int n, const float *a, const float *b, float *result)
 {    
    for (int i = 0; i < n; ++i)
--- a/Source/Core/Common/Src/MathUtil.h
+++ b/Source/Core/Common/Src/MathUtil.h
@ -20,8 +20,8 @@

 #include "Common.h"

-#include <xmmintrin.h>
 #include <vector>
+#include "FPURoundMode.h"

 namespace MathUtil
 {
@ -147,17 +147,6 @@ struct Rectangle
 inline float pow2f(float x) {return x * x;}
 inline double pow2(double x) {return x * x;}

-
-/*
-   There are two different flavors of float to int conversion:
-   _mm_cvtps_epi32() and _mm_cvttps_epi32(). The first rounds
-   according to the MXCSR rounding bits. The second one always
-   uses round towards zero.
- */
-
-void SaveSSEState();
-void LoadSSEState();
-void LoadDefaultSSEState();
 float MathFloatVectorSum(const std::vector<float>&);

 #define ROUND_UP(x, a)		(((x) + (a) - 1) & ~((a) - 1))
--- a/Source/Core/Common/Src/MemArena.cpp
+++ b/Source/Core/Common/Src/MemArena.cpp
@ -27,6 +27,10 @@
 #include <unistd.h>
 #include <cerrno>
 #include <cstring>
+#ifdef ANDROID
+#include <sys/ioctl.h>
+#include <linux/ashmem.h>
+#endif
 #endif

 #if defined(__APPLE__)
@ -34,11 +38,41 @@ static const char* ram_temp_file = "/tmp/gc_mem.tmp";
 #elif !defined(_WIN32) // non OSX unixes
 static const char* ram_temp_file = "/dev/shm/gc_mem.tmp";
 #endif
+#ifdef ANDROID
+#define ASHMEM_DEVICE "/dev/ashmem"
+
+int AshmemCreateFileMapping(const char *name, size_t size)
+{
+	int fd, ret;
+	fd = open(ASHMEM_DEVICE, O_RDWR);
+	if (fd < 0)
+		return fd;
+	
+	// We don't really care if we can't set the name, it is optional	
+	ret = ioctl(fd, ASHMEM_SET_NAME, name);
+	
+	ret = ioctl(fd, ASHMEM_SET_SIZE, size);
+	if (ret < 0)
+	{
+		close(fd);
+		NOTICE_LOG(MEMMAP, "Ashmem returned error: 0x%08x", ret);
+		return ret;
+	}
+	return fd;
+}
+#endif

 void MemArena::GrabLowMemSpace(size_t size)
 {
 #ifdef _WIN32
 	hMemoryMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, (DWORD)(size), NULL);
+#elif defined(ANDROID)
+	fd = AshmemCreateFileMapping("Dolphin-emu", size);
+	if (fd < 0)
+	{
+		NOTICE_LOG(MEMMAP, "Ashmem allocation failed");
+		return;
+	}
 #else
 	mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 	fd = open(ram_temp_file, O_RDWR | O_CREAT, mode);
--- a/Source/Core/Common/Src/MemoryUtil.cpp
+++ b/Source/Core/Common/Src/MemoryUtil.cpp
@ -117,9 +117,12 @@ void* AllocateAlignedMemory(size_t size,size_t alignment)
 	void* ptr =  _aligned_malloc(size,alignment);
 #else
 	void* ptr = NULL;
+#ifdef ANDROID
+	ptr = memalign(alignment, size);
+#else
 	if (posix_memalign(&ptr, alignment, size) != 0)
 		ERROR_LOG(MEMMAP, "Failed to allocate aligned memory");
-;
+#endif
 #endif

 	// printf("Mapped memory at %p (size %ld)\n", ptr,
--- a/Source/Core/Common/Src/StdConditionVariable.h
+++ b/Source/Core/Common/Src/StdConditionVariable.h
@ -5,7 +5,7 @@
 #define GCC_VER(x,y,z)	((x) * 10000 + (y) * 100 + (z))
 #define GCC_VERSION GCC_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)

-#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__
+#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ && !ANDROID
 // GCC 4.4 provides <condition_variable>
 #include <condition_variable>
 #else
--- a/Source/Core/Common/Src/StdMutex.h
+++ b/Source/Core/Common/Src/StdMutex.h
@ -5,7 +5,7 @@
 #define GCC_VER(x,y,z)	((x) * 10000 + (y) * 100 + (z))
 #define GCC_VERSION GCC_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)

-#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__
+#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ && !ANDROID
 // GCC 4.4 provides <mutex>
 #include <mutex>
 #else
--- a/Source/Core/Common/Src/StdThread.h
+++ b/Source/Core/Common/Src/StdThread.h
@ -5,7 +5,7 @@
 #define GCC_VER(x,y,z)	((x) * 10000 + (y) * 100 + (z))
 #define GCC_VERSION GCC_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)

-#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__
+#if GCC_VERSION >= GCC_VER(4,4,0) && __GXX_EXPERIMENTAL_CXX0X__ && !ANDROID
 // GCC 4.4 provides <thread>
 #ifndef _GLIBCXX_USE_SCHED_YIELD
 #define _GLIBCXX_USE_SCHED_YIELD
--- a/Source/Core/Common/Src/StringUtil.cpp
+++ b/Source/Core/Common/Src/StringUtil.cpp
@ -245,7 +245,7 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
 	while(1)
 	{
 		const int pos = result.find(src);
-		if (pos == -1) break;
+		if (pos == 16) break;
 		result.replace(pos, src.size(), dest);
 	}
 	return result;
@ -263,25 +263,25 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
 const char HEX2DEC[256] = 
 {
 	/*       0  1  2  3   4  5  6  7   8  9  A  B   C  D  E  F */
-	/* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 3 */  0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
+	/* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* 1 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* 2 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* 3 */  0, 1, 2, 3,  4, 5, 6, 7,  8, 9,16,16, 16,16,16,16,

-	/* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+	/* 4 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
+	/* 5 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* 6 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
+	/* 7 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,

-	/* 8 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* 9 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* A */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* B */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+	/* 8 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* 9 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* A */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* B */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,

-	/* C */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* D */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* E */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-	/* F */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+	/* C */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* D */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* E */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+	/* F */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16
 };

 std::string UriDecode(const std::string & sSrc)
@ -303,8 +303,8 @@ std::string UriDecode(const std::string & sSrc)
 		if (*pSrc == '%')
 		{
 			char dec1, dec2;
-			if (-1 != (dec1 = HEX2DEC[*(pSrc + 1)])
-				&& -1 != (dec2 = HEX2DEC[*(pSrc + 2)]))
+			if (16 != (dec1 = HEX2DEC[*(pSrc + 1)])
+				&& 16 != (dec2 = HEX2DEC[*(pSrc + 2)]))
 			{
 				*pEnd++ = (dec1 << 4) + dec2;
 				pSrc += 3;
--- a/Source/Core/Common/Src/Thread.cpp
+++ b/Source/Core/Common/Src/Thread.cpp
@ -105,7 +105,7 @@ void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask)
 #ifdef __APPLE__
 	thread_policy_set(pthread_mach_thread_np(thread),
 		THREAD_AFFINITY_POLICY, (integer_t *)&mask, 1);
-#elif defined __linux__ || defined BSD4_4
+#elif (defined __linux__ || defined BSD4_4) && !(defined ANDROID)
 	cpu_set_t cpu_set;
 	CPU_ZERO(&cpu_set);
                
--- a/Source/Core/Common/Src/Thread.h
+++ b/Source/Core/Common/Src/Thread.h
@ -33,8 +33,6 @@
 #define INFINITE 0xffffffff
 #endif

-#include <xmmintrin.h>
-
 //for gettimeofday and struct time(spec|val)
 #include <time.h>
 #include <sys/time.h>
--- a/Source/Core/Common/Src/x64ABI.cpp
+++ b/Source/Core/Common/Src/x64ABI.cpp
@ -17,7 +17,7 @@

 #include "Common.h"
 #include "x64Emitter.h"
-#include "ABI.h"
+#include "x64ABI.h"

 using namespace Gen;

--- a/Source/Core/Common/Src/x64ABI.h
+++ b/Source/Core/Common/Src/x64ABI.h
--- a/Source/Core/Common/Src/x64CPUDetect.cpp
+++ b/Source/Core/Common/Src/x64CPUDetect.cpp
@ -30,7 +30,9 @@
 #else

 //#include <config/i386/cpuid.h>
+#ifndef _M_GENERIC
 #include <xmmintrin.h>
+#endif

 #if defined __FreeBSD__
 #include <sys/types.h>
@ -39,7 +41,9 @@
 static inline void do_cpuid(unsigned int *eax, unsigned int *ebx,
 						    unsigned int *ecx, unsigned int *edx)
 {
-#ifdef _LP64
+#if defined _M_GENERIC
+	(*eax) = (*ebx) = (*ecx) = (*edx) = 0;
+#elif defined _LP64
 	// Note: EBX is reserved on Mac OS X and in PIC on Linux, so it has to
 	// restored at the end of the asm block.
 	__asm__ (
--- a/Source/Core/Common/Src/x64Emitter.cpp
+++ b/Source/Core/Common/Src/x64Emitter.cpp
@ -17,7 +17,7 @@

 #include "Common.h"
 #include "x64Emitter.h"
-#include "ABI.h"
+#include "x64ABI.h"
 #include "CPUDetect.h"

 namespace Gen
--- a/Source/Core/Common/Src/x64Emitter.h
+++ b/Source/Core/Common/Src/x64Emitter.h
@ -757,7 +757,7 @@ public:
 		region_size = 0;
 	}

-	bool IsInCodeSpace(u8 *ptr)
+	bool IsInSpace(u8 *ptr)
 	{
 		return ptr >= region && ptr < region + region_size;
 	}
--- a/Source/Core/Common/Src/x64FPURoundMode.cpp
+++ b/Source/Core/Common/Src/x64FPURoundMode.cpp
@ -0,0 +1,120 @@
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#include "Common.h"
+#include "FPURoundMode.h"
+
+#ifndef _WIN32
+static const unsigned short FPU_ROUND_NEAR = 0 << 10;
+static const unsigned short FPU_ROUND_DOWN = 1 << 10;
+static const unsigned short FPU_ROUND_UP   = 2 << 10;
+static const unsigned short FPU_ROUND_CHOP = 3 << 10;
+static const unsigned short FPU_ROUND_MASK = 3 << 10;
+#include <xmmintrin.h>
+#endif
+
+const u32 MASKS = 0x1F80;  // mask away the interrupts.
+const u32 DAZ = 0x40;
+const u32 FTZ = 0x8000;
+
+namespace FPURoundMode
+{
+	// Get the default SSE states here.
+	static u32 saved_sse_state = _mm_getcsr();
+	static const u32 default_sse_state = _mm_getcsr();
+
+	void SetRoundMode(u32 mode)
+	{
+		// Set FPU rounding mode to mimic the PowerPC's
+		#ifdef _M_IX86
+			// This shouldn't really be needed anymore since we use SSE
+		#ifdef _WIN32
+			const int table[4] = 
+			{
+				_RC_NEAR,
+				_RC_CHOP,
+				_RC_UP,
+				_RC_DOWN
+			};
+			_set_controlfp(_MCW_RC, table[mode]);
+		#else
+			const unsigned short table[4] = 
+			{
+				FPU_ROUND_NEAR,
+				FPU_ROUND_CHOP,
+				FPU_ROUND_UP,
+				FPU_ROUND_DOWN
+			};
+			unsigned short _mode;
+			asm ("fstcw %0" : "=m" (_mode) : );
+			_mode = (_mode & ~FPU_ROUND_MASK) | table[mode];
+			asm ("fldcw %0" : : "m" (_mode));
+		#endif
+		#endif
+	}
+
+	void SetPrecisionMode(u32 mode)
+	{	
+		#ifdef _M_IX86
+			// sets the floating-point lib to 53-bit
+			// PowerPC has a 53bit floating pipeline only
+			// eg: sscanf is very sensitive
+		#ifdef _WIN32
+			_control87(_PC_53, MCW_PC);
+		#else
+			const unsigned short table[4] = {
+				0 << 8, // FPU_PREC_24
+				2 << 8, // FPU_PREC_53
+				3 << 8, // FPU_PREC_64
+				3 << 8, // FPU_PREC_MASK
+			};
+			unsigned short _mode;
+			asm ("fstcw %0" : : "m" (_mode));
+			_mode = (_mode & ~table[4]) | table[mode];
+			asm ("fldcw %0" : : "m" (_mode));
+		#endif
+		#else
+			//x64 doesn't need this - fpu is done with SSE
+			//but still - set any useful sse options here
+		#endif
+	}
+	void SetSIMDMode(u32 mode)
+	{
+		static const u32 ssetable[4] = 
+		{
+			(0 << 13) | MASKS,
+			(3 << 13) | MASKS,
+			(2 << 13) | MASKS,
+			(1 << 13) | MASKS,
+		};
+		u32 csr = ssetable[mode];
+		_mm_setcsr(csr);
+	}
+	
+	void SaveSIMDState()
+	{
+		saved_sse_state = _mm_getcsr();
+	}
+	void LoadSIMDState()
+	{
+		_mm_setcsr(saved_sse_state);
+	}
+	void LoadDefaultSIMDState()
+	{
+		_mm_setcsr(default_sse_state);
+	}
+}
--- a/Source/Core/Common/Src/x64Thunk.cpp
+++ b/Source/Core/Common/Src/x64Thunk.cpp
@ -18,9 +18,8 @@
 #include <map>

 #include "Common.h"
-#include "x64Emitter.h"
 #include "MemoryUtil.h"
-#include "ABI.h"
+#include "x64ABI.h"
 #include "Thunk.h"

 #define THUNK_ARENA_SIZE 1024*1024*1