Lots of FP hacking for little gain, super monkey ball is only slightly more sane (wow, it rhymed). Temporary no-speed-limit hack: Hold TAB.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@279 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-09-13 06:52:58 -06:00 · 2008-08-23 09:20:36 +00:00
parent 0becaa3223
commit f82bf2ae9a
25 changed files with 1107 additions and 861 deletions
--- a/Externals/Bochs_disasm/PowerPCDisasm.cpp
+++ b/Externals/Bochs_disasm/PowerPCDisasm.cpp
@ -2075,7 +2075,7 @@ namespace PPCDisasm
 			break;

 		case 20:
-			fdabc(dp,in,"sqrte",2,0);
+			fdabc(dp,in,"rsqrte",2,0);
 			break;

 		case 24:
--- a/Externals/wxWidgets/include/wx/msw/listctrl.h
+++ b/Externals/wxWidgets/include/wx/msw/listctrl.h
@ -12,6 +12,7 @@
 #ifndef _WX_LISTCTRL_H_
 #define _WX_LISTCTRL_H_

+#include "wx/dcbuffer.h"
 #include "wx/textctrl.h"

 class WXDLLIMPEXP_FWD_CORE wxImageList;
--- a/Source/Core/Core/Core.vcproj
+++ b/Source/Core/Core/Core.vcproj
@ -50,6 +50,7 @@
 				BasicRuntimeChecks="3"
 				RuntimeLibrary="1"
 				BufferSecurityCheck="true"
+				FloatingPointModel="0"
 				UsePrecompiledHeader="2"
 				AssemblerListingLocation="$(IntDir)\"
 				WarningLevel="3"
@ -118,6 +119,7 @@
 				BasicRuntimeChecks="3"
 				RuntimeLibrary="1"
 				BufferSecurityCheck="true"
+				FloatingPointModel="0"
 				UsePrecompiledHeader="2"
 				AssemblerListingLocation="$(IntDir)\"
 				WarningLevel="3"
@ -190,7 +192,7 @@
 				RuntimeLibrary="0"
 				BufferSecurityCheck="false"
 				EnableEnhancedInstructionSet="2"
-				FloatingPointModel="2"
+				FloatingPointModel="0"
 				UsePrecompiledHeader="2"
 				AssemblerListingLocation="$(IntDir)\"
 				WarningLevel="3"
@ -265,7 +267,7 @@
 				RuntimeLibrary="0"
 				BufferSecurityCheck="false"
 				EnableEnhancedInstructionSet="0"
-				FloatingPointModel="2"
+				FloatingPointModel="0"
 				UsePrecompiledHeader="2"
 				AssemblerListingLocation="$(IntDir)\"
 				WarningLevel="3"
@ -336,6 +338,7 @@
 				PreprocessorDefinitions="NDEBUG;_LIB;LOGGING;DEBUGFAST;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0"
 				RuntimeLibrary="0"
 				BufferSecurityCheck="false"
+				FloatingPointModel="0"
 				UsePrecompiledHeader="2"
 				AssemblerListingLocation="$(IntDir)\"
 				WarningLevel="3"
@ -407,6 +410,7 @@
 				PreprocessorDefinitions="NDEBUG;_LIB;LOGGING;DEBUGFAST;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0"
 				RuntimeLibrary="0"
 				BufferSecurityCheck="false"
+				FloatingPointModel="0"
 				UsePrecompiledHeader="2"
 				AssemblerListingLocation="$(IntDir)\"
 				WarningLevel="3"
@ -835,6 +839,14 @@
 				<File
 					RelativePath=".\Src\PowerPC\Interpreter\Interpreter_FloatingPoint.cpp"
 					>
+					<FileConfiguration
+						Name="Release|Win32"
+						>
+						<Tool
+							Name="VCCLCompilerTool"
+							AssemblerOutput="4"
+						/>
+					</FileConfiguration>
 				</File>
 				<File
 					RelativePath=".\Src\PowerPC\Interpreter\Interpreter_Integer.cpp"
@ -844,6 +856,10 @@
 					RelativePath=".\Src\PowerPC\Interpreter\Interpreter_LoadStore.cpp"
 					>
 				</File>
+				<File
+					RelativePath=".\Src\PowerPC\Interpreter\Interpreter_LoadStorePaired.cpp"
+					>
+				</File>
 				<File
 					RelativePath=".\Src\PowerPC\Interpreter\Interpreter_Paired.cpp"
 					>
@ -851,6 +867,14 @@
 				<File
 					RelativePath=".\Src\PowerPC\Interpreter\Interpreter_SystemRegisters.cpp"
 					>
+					<FileConfiguration
+						Name="Release|Win32"
+						>
+						<Tool
+							Name="VCCLCompilerTool"
+							AssemblerOutput="4"
+						/>
+					</FileConfiguration>
 				</File>
 			</Filter>
 			<Filter
@ -946,46 +970,6 @@
 				</File>
 			</Filter>
 		</Filter>
-		<Filter
-			Name="Boot"
-			>
-			<File
-				RelativePath=".\Src\Boot\Boot.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\Boot.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\Boot_DOL.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\Boot_DOL.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\Boot_ELF.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\Boot_ELF.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\ElfReader.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\ElfReader.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Src\Boot\ElfTypes.h"
-				>
-			</File>
-		</Filter>
 		<Filter
 			Name="Debugger"
 			>
@ -1025,6 +1009,46 @@
 				RelativePath=".\Src\Debugger\PPCDebugInterface.h"
 				>
 			</File>
+			<Filter
+				Name="Boot"
+				>
+				<File
+					RelativePath=".\Src\Boot\Boot.cpp"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\Boot.h"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\Boot_DOL.cpp"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\Boot_DOL.h"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\Boot_ELF.cpp"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\Boot_ELF.h"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\ElfReader.cpp"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\ElfReader.h"
+					>
+				</File>
+				<File
+					RelativePath=".\Src\Boot\ElfTypes.h"
+					>
+				</File>
+			</Filter>
 		</Filter>
 		<Filter
 			Name="IPC HLE"
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@ -16,6 +16,7 @@
 // http://code.google.com/p/dolphin-emu/

 #include <math.h>
+#include <limits>

 #ifdef _WIN32
 #include <intrin.h>
@ -26,16 +27,87 @@
 #include "../../Core.h"
 #include "Interpreter.h"

-// If you wanna have fun, read:
-// 80007e08 in super monkey ball
+// SUPER MONKEY BALL IS BEING A ROYAL PAIN
+// We are missing the caller of 800070ec
+// POSSIBLE APPROACHES:
+// * Full SW FPU. Urgh.
+// * Partial SW FPU, emulate just as much as necessary for monkey ball. Feasible but a lot of work.
+// * HLE hacking. Figure out what all the evil functions really do and fake them.
+
+// Interesting places in Super Monkey Ball:
+// 80036654: fctwixz stuff
+// 80007e08:
+//	-98: Various entry points that loads various odd fp values into f1
+// 800070b0: Estimate inverse square root.
+// 800070ec: Examine f1. Reads a value out of locked cache into f2 (fixed address). Some cases causes us to call the above thing.
+//           If all goes well, jump to 70b0, which estimates the inverse square root. 
+//           Then multiply the loaded variable with the original value of f1. Result should be the square root. (1 / sqrt(x)) * x  = x / sqrt(x) = sqrt(x)
+// 8000712c: Similar, but does not do the multiply at the end, just an frspx.
+// 8000716c: Sort of similar, but has extra junk at the end.
+//
+// 
+// 800072a4 - nightmare of nightmares
 // Fun stuff used:
 // bso+
-// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly
+// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly (!!!!)
 // crclr
 // crset
 // crxor
 // fnabs
-// 
+// Super Monkey Ball reads FPRF & friends after fmadds, fmuls, frspx
+// WHY do the FR & FI flags affect it so much?
+
+void UpdateFPSCR(UReg_FPSCR fp);
+void UpdateSSEState();
+
+void UpdateFPRF(double value)
+{
+	u64 ivalue = *((u64*)&value);
+	// 5 bits (C, <, >, =, ?)
+	// top: class descriptor
+	FPSCR.FPRF = 4;
+	// easy cases first
+	if (ivalue == 0) {
+		// positive zero
+		FPSCR.FPRF = 0x2;
+	} else if (ivalue == 0x8000000000000000ULL) {
+		// negative zero
+		FPSCR.FPRF = 0x12;
+	} else if (ivalue == 0x7FF0000000000000ULL) {
+		// positive inf
+		FPSCR.FPRF = 0x5;
+	} else if (ivalue == 0xFFF0000000000000ULL) {
+		// negative inf
+		FPSCR.FPRF = 0x9;
+	} else {
+		// OK let's dissect this thing.
+		int sign = ivalue >> 63;
+		int exp = (ivalue >> 52) & 0x7FF;
+		if (exp >= 1 && exp <= 2046) {
+			// Nice normalized number.
+			if (sign) {
+				FPSCR.FPRF = 0x8; // negative
+			} else {
+				FPSCR.FPRF = 0x4; // positive
+			}
+			return;
+		}
+		u64 mantissa = ivalue & 0x000FFFFFFFFFFFFFULL;
+		int mantissa_top = mantissa >> 51;
+		if (exp == 0 && mantissa) {
+			// Denormalized number.
+			if (sign) {
+				FPSCR.FPRF = 0x18;
+			} else {
+				FPSCR.FPRF = 0x14;
+			}
+		} else if (exp == 0x7FF && mantissa /* && mantissa_top*/) {
+			FPSCR.FPRF = 0x11; // Quiet NAN
+			return;
+		}
+	}
+}
+

 // extremely rare
 void CInterpreter::Helper_UpdateCR1(double _fValue)
@ -48,110 +120,24 @@ void CInterpreter::Helper_UpdateCR1(double _fValue)
 	if (_fValue < 0.0)
 		FPSCR.FPRF |= 8;
 	SetCRField(1, (FPSCR.Hex & 0x0000F000) >> 12);
+
+	PanicAlert("CR1");
 }

 bool CInterpreter::IsNAN(double _dValue) 
 { 
-	// not implemented
 	return _dValue != _dValue; 
 }

-void CInterpreter::faddsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fdivsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
-}
-
-void CInterpreter::fmaddsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = 
-		static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fmsubsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) =
-		static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fmulsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
-}
-
-void CInterpreter::fnmaddsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = 
-		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fnmsubsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = 
-		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fresx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fsqrtsx(UGeckoInstruction _inst)
-{
-	static bool bFirst = true;
-	if (bFirst)
-		PanicAlert("fsqrtsx - Instruction unimplemented");
-	bFirst = false;
-}
-
-void CInterpreter::fsubsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-//
-//--- END OF SINGLE PRECISION ---
-//
-
-void CInterpreter::fabsx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = fabs(rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
 void CInterpreter::fcmpo(UGeckoInstruction _inst)
 {
 	double fa =	rPS0(_inst.FA);
 	double fb =	rPS0(_inst.FB);
 	u32 compareResult;
-
-	if (IsNAN(fa) || IsNAN(fb)) compareResult = 1; 
-	else if(fa < fb)            compareResult = 8;	
-	else if(fa > fb)            compareResult = 4;
-	else                        compareResult = 2;
+	if (IsNAN(fa) || IsNAN(fb))  compareResult = 1;
+	else if (fa < fb)            compareResult = 8; 
+	else if (fa > fb)            compareResult = 4; 
+	else                         compareResult = 2;

 	FPSCR.FPRF = compareResult;
 	SetCRField(_inst.CRFD, compareResult);
@ -171,10 +157,10 @@ void CInterpreter::fcmpu(UGeckoInstruction _inst)
 	double fb =	rPS0(_inst.FB);

 	u32 compareResult;
-	if(IsNAN(fa) ||	IsNAN(fb))  compareResult = 1; 
-	else if(fa < fb)            compareResult =	8; 
-	else if(fa > fb)            compareResult =	4; 
-	else                        compareResult = 2;
+	if (IsNAN(fa) || IsNAN(fb))  compareResult = 1; 
+	else if (fa < fb)            compareResult = 8; 
+	else if (fa > fb)            compareResult = 4; 
+	else                         compareResult = 2;

 	FPSCR.FPRF = compareResult;
 	SetCRField(_inst.CRFD, compareResult);
@ -184,25 +170,36 @@ void CInterpreter::fcmpu(UGeckoInstruction _inst)
 		then VXSNAN <20> 1 */
 }

-
 // Apply current rounding mode
 void CInterpreter::fctiwx(UGeckoInstruction _inst)
 {
-	double b = rPS0(_inst.FB);
+	UpdateSSEState();
+	const double b = rPS0(_inst.FB);
 	u32 value;
 	if (b > (double)0x7fffffff)
+	{
 		value = 0x7fffffff;
+		FPSCR.VXCVI = 1;
+	}
 	else if (b < -(double)0x7fffffff) 
+	{
 		value = 0x80000000; 
+		FPSCR.VXCVI = 1;
+	}
 	else
-		value = (u32)(s32)_mm_cvtsd_si32(_mm_set_sd(b)); // TODO(ector): enforce chop
+	{
+		value = (u32)(s32)_mm_cvtsd_si32(_mm_set_sd(b));  // obey current rounding mode
+		double d_value = (double)value;
+		bool inexact = (d_value != b);
+//		FPSCR.FI = inexact ? 1 : 0;
+//		FPSCR.XX |= FPSCR.FI;
+//		FPSCR.FR = fabs(d_value) > fabs(b);
+	}
+
+	//TODO: FR
+	//FPRF undefined

 	riPS0(_inst.FD) = (u64)value; // zero extend
-
-	/* TODO(ector):
-	FPSCR[FR] is set if the result is incremented when rounded. 
-	FPSCR[FI] is set if the result is inexact.
-	*/
 	if (_inst.Rc) 
 		Helper_UpdateCR1(rPS0(_inst.FD));
 }
@ -215,14 +212,29 @@ largest representable int on PowerPC. */
 // Always round toward zero
 void CInterpreter::fctiwzx(UGeckoInstruction _inst)
 {
-	double b = rPS0(_inst.FB);
+	//UpdateFPSCR(FPSCR);
+	const double b = rPS0(_inst.FB);
 	u32 value;
 	if (b > (double)0x7fffffff)
+	{
 		value = 0x7fffffff;
+		FPSCR.VXCVI = 1;
+	}
 	else if (b < -(double)0x7fffffff)
+	{
 		value = 0x80000000;
+		FPSCR.VXCVI = 1;
+	}
 	else
-		value = (u32)(s32)_mm_cvttsd_si32(_mm_set_sd(b)); //TODO(ector): force round toward zero
+	{
+		value = (u32)(s32)_mm_cvttsd_si32(_mm_set_sd(b)); // truncate
+		double d_value = (double)value;
+		bool inexact = (d_value != b);
+//		FPSCR.FI = inexact ? 1 : 0;
+//		FPSCR.XX |= FPSCR.FI;
+//		FPSCR.FR = 1; //fabs(d_value) > fabs(b);
+	}
+	//FPRF undefined

 	riPS0(_inst.FD) = (u64)value;
 	if (_inst.Rc) 
@ -232,109 +244,281 @@ void CInterpreter::fctiwzx(UGeckoInstruction _inst)
 void CInterpreter::fmrx(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB);
-//	rPS1(_inst.FD) = rPS0(_inst.FD);    // TODO: Should this be here?
+	// This is a binary instruction. Does not alter FPSCR
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}

+void CInterpreter::fabsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = fabs(rPS0(_inst.FB));
+	// This is a binary instruction. Does not alter FPSCR
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void CInterpreter::fnabsx(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
-
+	// This is a binary instruction. Does not alter FPSCR
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void CInterpreter::fnegx(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63);
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-// !!! warning !!!
-// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
-// PS1 is said to be undefined
-// TODO(ector): TODO(fires): does this apply to all of the below opcodes?
-void CInterpreter::frspx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::faddx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::fdivx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::fmaddx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::fmsubx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fmulx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC);
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
-}
-
-void CInterpreter::fnmaddx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::fnmsubx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::frsqrtex(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = 1.0 / (sqrt(rPS0(_inst.FB)));
-
+	// This is a binary instruction. Does not alter FPSCR
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void CInterpreter::fselx(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = (rPS0(_inst.FA) >= 0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB);
+	rPS0(_inst.FD) = (rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB);
+	// This is a binary instruction. Does not alter FPSCR
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}

+
+// !!! warning !!!
+// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
+// PS1 is said to be undefined
+// Super Monkey Ball is using this to do wacky tricks so we need 100% correct emulation.
+void CInterpreter::frspx(UGeckoInstruction _inst)  // round to single
+{
+	if (true || FPSCR.RN != 0)
+	{
+		// Not used in Super Monkey Ball
+		UpdateSSEState();
+		double b = rPS0(_inst.FB);
+		double rounded = (double)(float)b;
+		FPSCR.FI = b != rounded;  // changing both of these affect Super Monkey Ball behaviour greatly.
+		FPSCR.FR = 1;  // WHY? fabs(rounded) > fabs(b);
+		rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
+		return;
+		// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
+	}
+
+	// OK, let's try it in 100% software! Not yet working right.
+	union {
+		double d;
+		u64 i;
+	} in, out;
+	in.d = rPS0(_inst.FB);
+	out = in;
+	int sign = in.i >> 63;
+	int exp = (in.i >> 52) & 0x7FF;
+	u64 mantissa = in.i & 0x000FFFFFFFFFFFFFULL;
+	u64 mantissa_single = mantissa & 0x000FFFFFE0000000ULL;
+	u64 leftover_single = mantissa & 0x000000001FFFFFFFULL;
+
+	// OK. First make sure that we have a "normal" number.
+	if (exp >= 1 && exp <= 2046) {
+		// OK. Check for overflow. TODO
+
+		FPSCR.FI = leftover_single != 0; // Inexact
+		if (leftover_single >= 0x10000000ULL) {
+			//PanicAlert("rounding up");
+			FPSCR.FR = 1;
+			mantissa_single += 0x20000000;
+			if (mantissa_single & 0x0010000000000000) {
+				// PanicAlert("renormalizing");
+				mantissa_single >>= 1;
+				exp += 1;
+				// if (exp > 2046) { OVERFLOW }
+			}
+		}
+		out.i = ((u64)sign << 63) | ((u64)exp << 52) | mantissa_single;
+	} else {
+		if (!exp && !mantissa) {
+			// Positive or negative Zero. All is well.
+			FPSCR.FI = 0;
+			FPSCR.FR = 0;
+		} else if (exp == 0 && mantissa) {
+			// Denormalized number.
+			PanicAlert("denorm");
+		} else if (exp == 2047 && !mantissa) {
+			// Infinite.
+			//PanicAlert("infinite");
+			FPSCR.FI = 1;
+			FPSCR.FR = 1;
+//			FPSCR.OX = 1;
+		} else {
+			//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
+		}
+	}
+	UpdateFPRF(out.d);
+	FPSCR.FR = 1; // SUPER MONKEY BALL HACK
+	rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
+
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+
+void CInterpreter::fmulx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC);
+	FPSCR.FI = 0;
+	FPSCR.FR = 1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+void CInterpreter::fmulsx(UGeckoInstruction _inst)
+{
+	double d_value = rPS0(_inst.FA) * rPS0(_inst.FC);
+	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(d_value);
+	FPSCR.FI = d_value != rPS0(_inst.FD);
+	FPSCR.FR = rand()&1;
+	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
+}
+
+
+void CInterpreter::fmaddx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
+	FPSCR.FI = 0;
+	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+void CInterpreter::fmaddsx(UGeckoInstruction _inst)
+{
+	double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
+	rPS0(_inst.FD) = rPS1(_inst.FD) = 
+		static_cast<float>(d_value);
+	FPSCR.FI = d_value != rPS0(_inst.FD);
+	FPSCR.FR = 0;
+	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+
+void CInterpreter::faddx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+void CInterpreter::faddsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 1;
+//	FPSCR.Hex = (rand() ^ (rand() << 8) ^ (rand() << 16)) & ~(0x000000F8);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+
+void CInterpreter::fdivx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 1;
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+void CInterpreter::fdivsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 1;
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
+}
+void CInterpreter::fresx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 1;
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+
+void CInterpreter::fmsubx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+void CInterpreter::fmsubsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) =
+		static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+
+void CInterpreter::fnmaddx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+void CInterpreter::fnmaddsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) = 
+		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+
+void CInterpreter::fnmsubx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+void CInterpreter::fnmsubsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) = 
+		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
+}
+
+
+void CInterpreter::fsubx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+void CInterpreter::fsubsx(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+
+void CInterpreter::frsqrtex(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = 1.0f / (sqrtf(rPS0(_inst.FB)));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void CInterpreter::fsqrtx(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD)  = sqrt(rPS0(_inst.FB));
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void CInterpreter::fsubx(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
+	rPS0(_inst.FD) = sqrt(rPS0(_inst.FB));
+//	FPSCR.FI = 0;
+//	FPSCR.FR = 0;

 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
@ -72,13 +72,13 @@ void CInterpreter::lfdu(UGeckoInstruction _inst)
 void CInterpreter::lfdux(UGeckoInstruction _inst)
 {
 	u32 uAddress = Helper_Get_EA_UX(_inst);
-	riPS0(_inst.FS) = Memory::Read_U64(uAddress);
+	riPS0(_inst.FD) = Memory::Read_U64(uAddress);
 	m_GPR[_inst.RA] = uAddress;
 }

 void CInterpreter::lfdx(UGeckoInstruction _inst)
 {
-	riPS0(_inst.FS) = Memory::Read_U64(Helper_Get_EA_X(_inst));
+	riPS0(_inst.FD) = Memory::Read_U64(Helper_Get_EA_X(_inst));
 }

 void CInterpreter::lfs(UGeckoInstruction _inst)
@ -149,7 +149,7 @@ void CInterpreter::lmw(UGeckoInstruction _inst)
 			return;
 		}

-		m_GPR[iReg] =  TempReg;
+		m_GPR[iReg] = TempReg;
 	}
 }

--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
@ -0,0 +1,337 @@
+// Copyright (C) 2003-2008 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#include <math.h>
+#include "Interpreter.h"
+#include "../../HW/Memmap.h"
+
+// dequantize table
+const float m_dequantizeTable[] =
+{
+	1.0 / (1 <<  0),	1.0 / (1 <<  1),	1.0 / (1 <<  2),	1.0 / (1 <<  3),
+	1.0 / (1 <<  4),	1.0 / (1 <<  5),	1.0 / (1 <<  6),	1.0 / (1 <<  7),
+	1.0 / (1 <<  8),	1.0 / (1 <<  9),	1.0 / (1 << 10),	1.0 / (1 << 11),
+	1.0 / (1 << 12),	1.0 / (1 << 13),	1.0 / (1 << 14),	1.0 / (1 << 15),
+	1.0 / (1 << 16),	1.0 / (1 << 17),	1.0 / (1 << 18),	1.0 / (1 << 19),
+	1.0 / (1 << 20),	1.0 / (1 << 21),	1.0 / (1 << 22),	1.0 / (1 << 23),
+	1.0 / (1 << 24),	1.0 / (1 << 25),	1.0 / (1 << 26),	1.0 / (1 << 27),
+	1.0 / (1 << 28),	1.0 / (1 << 29),	1.0 / (1 << 30),	1.0 / (1 << 31),
+	(1ULL << 32),	(1 << 31),	(1 << 30),	(1 << 29),
+	(1 << 28),	(1 << 27),	(1 << 26),	(1 << 25),
+	(1 << 24),	(1 << 23),	(1 << 22),	(1 << 21),
+	(1 << 20),	(1 << 19),	(1 << 18),	(1 << 17),
+	(1 << 16),	(1 << 15),	(1 << 14),	(1 << 13),
+	(1 << 12),	(1 << 11),	(1 << 10),	(1 <<  9),
+	(1 <<  8),	(1 <<  7),	(1 <<  6),	(1 <<  5),
+	(1 <<  4),	(1 <<  3),	(1 <<  2),	(1 <<  1),
+}; 
+
+// quantize table
+const float m_quantizeTable[] =
+{
+	(1 <<  0),	(1 <<  1),	(1 <<  2),	(1 <<  3),
+	(1 <<  4),	(1 <<  5),	(1 <<  6),	(1 <<  7),
+	(1 <<  8),	(1 <<  9),	(1 << 10),	(1 << 11),
+	(1 << 12),	(1 << 13),	(1 << 14),	(1 << 15),
+	(1 << 16),	(1 << 17),	(1 << 18),	(1 << 19),
+	(1 << 20),	(1 << 21),	(1 << 22),	(1 << 23),
+	(1 << 24),	(1 << 25),	(1 << 26),	(1 << 27),
+	(1 << 28),	(1 << 29),	(1 << 30),	(1 << 31),
+	1.0 / (1ULL << 32),	1.0 / (1 << 31),	1.0 / (1 << 30),	1.0 / (1 << 29),
+	1.0 / (1 << 28),	1.0 / (1 << 27),	1.0 / (1 << 26),	1.0 / (1 << 25),
+	1.0 / (1 << 24),	1.0 / (1 << 23),	1.0 / (1 << 22),	1.0 / (1 << 21),
+	1.0 / (1 << 20),	1.0 / (1 << 19),	1.0 / (1 << 18),	1.0 / (1 << 17),
+	1.0 / (1 << 16),	1.0 / (1 << 15),	1.0 / (1 << 14),	1.0 / (1 << 13),
+	1.0 / (1 << 12),	1.0 / (1 << 11),	1.0 / (1 << 10),	1.0 / (1 <<  9),
+	1.0 / (1 <<  8),	1.0 / (1 <<  7),	1.0 / (1 <<  6),	1.0 / (1 <<  5),
+	1.0 / (1 <<  4),	1.0 / (1 <<  3),	1.0 / (1 <<  2),	1.0 / (1 <<  1),
+}; 
+
+template <class T>
+inline T CLAMP(T a, T bottom, T top) {
+	if (a > top) return top;
+	if (a < bottom) return bottom;
+	return a;
+}
+
+void CInterpreter::Helper_Quantize(const u32 _Addr, const float _fValue, 
+							  const EQuantizeType _quantizeType, const unsigned int _uScale)
+{
+	switch(_quantizeType) 
+	{
+	case QUANTIZE_FLOAT:
+		Memory::Write_U32(*(u32*)&_fValue,_Addr);
+		break;
+
+	// used for THP player
+	case QUANTIZE_U8:
+		{
+			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f);
+			Memory::Write_U8((u8)fResult, _Addr); 
+		}
+		break;
+
+	case QUANTIZE_U16:
+		{
+			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f);
+			Memory::Write_U16((u16)fResult, _Addr); 
+		}
+		break;
+
+	case QUANTIZE_S8:
+		{
+			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f);
+			Memory::Write_U8((u8)(s8)fResult, _Addr); 
+		}
+		break;
+
+	case QUANTIZE_S16:
+		{
+			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f);
+			Memory::Write_U16((u16)(s16)fResult, _Addr); 
+		}
+		break;
+
+	default:
+		_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
+		break;
+	}
+}
+
+float CInterpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, 
+								const unsigned int _uScale)
+{
+	// dequantize the value
+	float fResult;
+	switch(_quantizeType)
+	{
+	case QUANTIZE_FLOAT:
+		{
+			u32 dwValue = Memory::Read_U32(_Addr);
+			fResult = *(float*)&dwValue;
+		}
+		break;
+
+	case QUANTIZE_U8:
+		fResult = static_cast<float>(Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale]; 
+		break;
+
+	case QUANTIZE_U16:
+		fResult = static_cast<float>(Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale]; 
+		break;
+
+	case QUANTIZE_S8:
+		fResult = static_cast<float>((s8)Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale]; 
+		break;
+
+		// used for THP player
+	case QUANTIZE_S16:
+		fResult = static_cast<float>((s16)Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
+		break;
+
+	default:
+		_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
+		fResult = 0;
+		break;
+	}
+
+	return fResult;
+}
+
+void CInterpreter::psq_l(UGeckoInstruction _inst) 
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
+	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
+	const unsigned int ldScale = gqr.LD_SCALE;
+	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
+
+	int c = 4;
+	if ((ldType == QUANTIZE_U8)  || (ldType == QUANTIZE_S8))  c = 0x1;
+	if ((ldType == QUANTIZE_U16) || (ldType == QUANTIZE_S16)) c = 0x2;
+
+	if (_inst.W == 0)
+	{
+		rPS0(_inst.RS) = Helper_Dequantize(EA,   ldType, ldScale);
+		rPS1(_inst.RS) = Helper_Dequantize(EA+c, ldType, ldScale);
+	}
+	else
+	{
+		rPS0(_inst.RS) = Helper_Dequantize(EA,   ldType, ldScale);
+		rPS1(_inst.RS) = 1.0f;
+	}
+}
+
+void CInterpreter::psq_lu(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
+	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
+	const unsigned int ldScale = gqr.LD_SCALE;
+	const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
+
+	int c = 4;
+	if ((ldType == 4) || (ldType == 6)) c = 0x1;
+	if ((ldType == 5) || (ldType == 7)) c = 0x2;
+
+	if (_inst.W == 0)
+	{
+		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
+		rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
+	}
+	else
+	{
+		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
+		rPS1(_inst.RS) = 1.0f;
+	}
+	m_GPR[_inst.RA] = EA;
+}
+
+void CInterpreter::psq_st(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
+	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
+	const unsigned int stScale = gqr.ST_SCALE;
+	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
+
+	int c = 4;
+	if ((stType == 4) || (stType == 6)) c = 0x1;
+	if ((stType == 5) || (stType == 7)) c = 0x2;
+
+	if (_inst.W == 0)
+	{
+		Helper_Quantize( EA,   (float)rPS0(_inst.RS), stType, stScale );
+		Helper_Quantize( EA+c, (float)rPS1(_inst.RS), stType, stScale );
+	}
+	else
+	{
+		Helper_Quantize( EA,   (float)rPS0(_inst.RS), stType, stScale );
+	}
+}
+
+void CInterpreter::psq_stu(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
+	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
+	const unsigned int stScale = gqr.ST_SCALE;
+	const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
+
+	int c = 4;
+	if ((stType == 4) || (stType == 6)) c = 0x1;
+	if ((stType == 5) || (stType == 7)) c = 0x2;
+
+	if (_inst.W == 0)
+	{
+		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
+		Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
+	}
+	else
+	{
+		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
+	}
+	m_GPR[_inst.RA] = EA;
+}
+
+void CInterpreter::psq_lx(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
+	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
+	const unsigned int ldScale = gqr.LD_SCALE;
+	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
+
+	int c = 4;
+	if ((ldType == 4) || (ldType == 6)) c = 0x1;
+	if ((ldType == 5) || (ldType == 7)) c = 0x2;
+
+	if (_inst.Wx == 0)
+	{
+		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
+		rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
+	}
+	else
+	{
+		rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
+		rPS1(_inst.RS) = 1.0f;
+	}
+}
+
+void CInterpreter::psq_stx(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
+	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
+	const unsigned int stScale = gqr.ST_SCALE;
+	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
+
+	int c = 4;
+	if ((stType == 4) || (stType == 6)) c = 0x1;
+	if ((stType == 5) || (stType == 7)) c = 0x2;
+
+	if (_inst.Wx == 0)
+	{
+		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
+		Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
+	}
+	else
+	{
+		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
+	}
+}
+
+void CInterpreter::psq_lux(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
+	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
+	const unsigned int ldScale = gqr.LD_SCALE;
+	const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
+
+	int c = 4;
+	if ((ldType == 4) || (ldType == 6)) c = 0x1;
+	if ((ldType == 5) || (ldType == 7)) c = 0x2;
+
+	if (_inst.Wx == 0)
+	{
+		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
+		rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
+	}
+	else
+	{
+		rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
+		rPS1(_inst.RS) = 1.0f;
+	}
+	m_GPR[_inst.RA] = EA;
+}
+
+void CInterpreter::psq_stux(UGeckoInstruction _inst)
+{
+	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
+	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
+	const unsigned int stScale = gqr.ST_SCALE;
+	const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
+
+	int c = 4;
+	if ((stType == 4) || (stType == 6)) c = 0x1;
+	if ((stType == 5) || (stType == 7)) c = 0x2;
+
+	if (_inst.Wx == 0)
+	{
+		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
+		Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
+	}
+	else
+	{
+		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
+	}
+	m_GPR[_inst.RA] = EA;
+}
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp
@ -19,482 +19,11 @@
 #include "Interpreter.h"
 #include "../../HW/Memmap.h"

-// dequantize table
-const float m_dequantizeTable[] =
-{
-	1.0 / (1 <<  0),
-	1.0 / (1 <<  1),
-	1.0 / (1 <<  2),
-	1.0 / (1 <<  3),
-	1.0 / (1 <<  4),
-	1.0 / (1 <<  5),
-	1.0 / (1 <<  6),
-	1.0 / (1 <<  7),
-	1.0 / (1 <<  8),
-	1.0 / (1 <<  9),
-	1.0 / (1 << 10),
-	1.0 / (1 << 11),
-	1.0 / (1 << 12),
-	1.0 / (1 << 13),
-	1.0 / (1 << 14),
-	1.0 / (1 << 15),
-	1.0 / (1 << 16),
-	1.0 / (1 << 17),
-	1.0 / (1 << 18),
-	1.0 / (1 << 19),
-	1.0 / (1 << 20),
-	1.0 / (1 << 21),
-	1.0 / (1 << 22),
-	1.0 / (1 << 23),
-	1.0 / (1 << 24),
-	1.0 / (1 << 25),
-	1.0 / (1 << 26),
-	1.0 / (1 << 27),
-	1.0 / (1 << 28),
-	1.0 / (1 << 29),
-	1.0 / (1 << 30),
-	1.0 / (1 << 31),
-	(1ULL << 32),
-	(1 << 31),
-	(1 << 30),
-	(1 << 29),
-	(1 << 28),
-	(1 << 27),
-	(1 << 26),
-	(1 << 25),
-	(1 << 24),
-	(1 << 23),
-	(1 << 22),
-	(1 << 21),
-	(1 << 20),
-	(1 << 19),
-	(1 << 18),
-	(1 << 17),
-	(1 << 16),
-	(1 << 15),
-	(1 << 14),
-	(1 << 13),
-	(1 << 12),
-	(1 << 11),
-	(1 << 10),
-	(1 <<  9),
-	(1 <<  8),
-	(1 <<  7),
-	(1 <<  6),
-	(1 <<  5),
-	(1 <<  4),
-	(1 <<  3),
-	(1 <<  2),
-	(1 <<  1),
-}; 
-
-// quantize table
-const float m_quantizeTable[] =
-{
-	(1 <<  0),
-	(1 <<  1),
-	(1 <<  2),
-	(1 <<  3),
-	(1 <<  4),
-	(1 <<  5),
-	(1 <<  6),
-	(1 <<  7),
-	(1 <<  8),
-	(1 <<  9),
-	(1 << 10),
-	(1 << 11),
-	(1 << 12),
-	(1 << 13),
-	(1 << 14),
-	(1 << 15),
-	(1 << 16),
-	(1 << 17),
-	(1 << 18),
-	(1 << 19),
-	(1 << 20),
-	(1 << 21),
-	(1 << 22),
-	(1 << 23),
-	(1 << 24),
-	(1 << 25),
-	(1 << 26),
-	(1 << 27),
-	(1 << 28),
-	(1 << 29),
-	(1 << 30),
-	(1 << 31),
-	1.0 / (1ULL << 32),
-	1.0 / (1 << 31),
-	1.0 / (1 << 30),
-	1.0 / (1 << 29),
-	1.0 / (1 << 28),
-	1.0 / (1 << 27),
-	1.0 / (1 << 26),
-	1.0 / (1 << 25),
-	1.0 / (1 << 24),
-	1.0 / (1 << 23),
-	1.0 / (1 << 22),
-	1.0 / (1 << 21),
-	1.0 / (1 << 20),
-	1.0 / (1 << 19),
-	1.0 / (1 << 18),
-	1.0 / (1 << 17),
-	1.0 / (1 << 16),
-	1.0 / (1 << 15),
-	1.0 / (1 << 14),
-	1.0 / (1 << 13),
-	1.0 / (1 << 12),
-	1.0 / (1 << 11),
-	1.0 / (1 << 10),
-	1.0 / (1 <<  9),
-	1.0 / (1 <<  8),
-	1.0 / (1 <<  7),
-	1.0 / (1 <<  6),
-	1.0 / (1 <<  5),
-	1.0 / (1 <<  4),
-	1.0 / (1 <<  3),
-	1.0 / (1 <<  2),
-	1.0 / (1 <<  1),
-}; 
-
-template <class T>
-inline T CLAMP(T a, T bottom, T top) {
-	if (a > top) return top;
-	if (a < bottom) return bottom;
-	return a;
-}
-void CInterpreter::Helper_Quantize(const u32 _Addr, const float _fValue, 
-							  const EQuantizeType _quantizeType, const unsigned int _uScale)
-{
-	switch(_quantizeType) 
-	{
-	case QUANTIZE_FLOAT:
-		Memory::Write_U32(*(u32*)&_fValue,_Addr);
-		break;
-
-	// used for THP player
-	case QUANTIZE_U8:
-		{
-			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f);
-			Memory::Write_U8((u8)fResult, _Addr); 
-		}
-		break;
-
-	case QUANTIZE_U16:
-		{
-			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f);
-			Memory::Write_U16((u16)fResult, _Addr); 
-		}
-		break;
-
-	case QUANTIZE_S8:
-		{
-			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f);
-			Memory::Write_U8((u8)(s8)fResult, _Addr); 
-		}
-		break;
-
-	case QUANTIZE_S16:
-		{
-			float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f);
-			Memory::Write_U16((u16)(s16)fResult, _Addr); 
-		}
-		break;
-
-	default:
-		_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
-		break;
-	}
-}
-
-float CInterpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, 
-								const unsigned int _uScale)
-{
-	// dequantize the value
-	float fResult;
-	switch(_quantizeType)
-	{
-	case QUANTIZE_FLOAT:
-		{
-			u32 dwValue = Memory::Read_U32(_Addr);
-			fResult = *(float*)&dwValue;
-		}
-		break;
-
-	case QUANTIZE_U8:
-		fResult = static_cast<float>(Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale]; 
-		break;
-
-	case QUANTIZE_U16:
-		fResult = static_cast<float>(Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale]; 
-		break;
-
-	case QUANTIZE_S8:
-		fResult = static_cast<float>((s8)Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale]; 
-		break;
-
-		// used for THP player
-	case QUANTIZE_S16:
-		fResult = static_cast<float>((s16)Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
-		break;
-
-	default:
-		_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
-		fResult = 0;
-		break;
-	}
-
-	return fResult;
-}
-
-void CInterpreter::psq_l(UGeckoInstruction _inst) 
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
-	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
-	const unsigned int ldScale = gqr.LD_SCALE;
-	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
-
-	int c = 4;
-	if ((ldType == QUANTIZE_U8)  || (ldType == QUANTIZE_S8))  c = 0x1;
-	if ((ldType == QUANTIZE_U16) || (ldType == QUANTIZE_S16)) c = 0x2;
-
-	if (_inst.W == 0)
-	{
-		rPS0(_inst.RS) = Helper_Dequantize(EA,   ldType, ldScale);
-		rPS1(_inst.RS) = Helper_Dequantize(EA+c, ldType, ldScale);
-	}
-	else
-	{
-		rPS0(_inst.RS) = Helper_Dequantize(EA,   ldType, ldScale);
-		rPS1(_inst.RS) = 1.0f;
-	}
-}
-
-void CInterpreter::psq_lu(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
-	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
-	const unsigned int ldScale = gqr.LD_SCALE;
-	const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
-
-	int c = 4;
-	if ((ldType == 4) || (ldType == 6)) c = 0x1;
-	if ((ldType == 5) || (ldType == 7)) c = 0x2;
-
-	if (_inst.W == 0)
-	{
-		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
-		rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
-	}
-	else
-	{
-		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
-		rPS1(_inst.RS) = 1.0f;
-	}
-	m_GPR[_inst.RA] = EA;
-}
-
-void CInterpreter::psq_st(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
-	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
-	const unsigned int stScale = gqr.ST_SCALE;
-	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
-
-	int c = 4;
-	if ((stType == 4) || (stType == 6)) c = 0x1;
-	if ((stType == 5) || (stType == 7)) c = 0x2;
-
-	if (_inst.W == 0)
-	{
-		Helper_Quantize( EA,   (float)rPS0(_inst.RS), stType, stScale );
-		Helper_Quantize( EA+c, (float)rPS1(_inst.RS), stType, stScale );
-	}
-	else
-	{
-		Helper_Quantize( EA,   (float)rPS0(_inst.RS), stType, stScale );
-	}
-}
-
-void CInterpreter::psq_stu(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
-	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
-	const unsigned int stScale = gqr.ST_SCALE;
-	const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
-
-	int c = 4;
-	if ((stType == 4) || (stType == 6)) c = 0x1;
-	if ((stType == 5) || (stType == 7)) c = 0x2;
-
-	if (_inst.W == 0)
-	{
-		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
-		Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
-	}
-	else
-	{
-		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
-	}
-	m_GPR[_inst.RA] = EA;
-}
-
-void CInterpreter::psq_lx(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
-	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
-	const unsigned int ldScale = gqr.LD_SCALE;
-	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
-
-	int c = 4;
-	if ((ldType == 4) || (ldType == 6)) c = 0x1;
-	if ((ldType == 5) || (ldType == 7)) c = 0x2;
-
-	if (_inst.Wx == 0)
-	{
-		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
-		rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
-	}
-	else
-	{
-		rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
-		rPS1(_inst.RS) = 1.0f;
-	}
-}
-
-void CInterpreter::psq_stx(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
-	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
-	const unsigned int stScale = gqr.ST_SCALE;
-	const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
-
-	int c = 4;
-	if ((stType == 4) || (stType == 6)) c = 0x1;
-	if ((stType == 5) || (stType == 7)) c = 0x2;
-
-	if (_inst.Wx == 0)
-	{
-		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
-		Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
-	}
-	else
-	{
-		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
-	}
-}
-
-void CInterpreter::psq_lux(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
-	const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
-	const unsigned int ldScale = gqr.LD_SCALE;
-	const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
-
-	int c = 4;
-	if ((ldType == 4) || (ldType == 6)) c = 0x1;
-	if ((ldType == 5) || (ldType == 7)) c = 0x2;
-
-	if (_inst.Wx == 0)
-	{
-		rPS0(_inst.RS) = Helper_Dequantize( EA,   ldType, ldScale );
-		rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
-	}
-	else
-	{
-		rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
-		rPS1(_inst.RS) = 1.0f;
-	}
-	m_GPR[_inst.RA] = EA;
-}
-
-void CInterpreter::psq_stux(UGeckoInstruction _inst)
-{
-	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
-	const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
-	const unsigned int stScale = gqr.ST_SCALE;
-	const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
-
-	int c = 4;
-	if ((stType == 4) || (stType == 6)) c = 0x1;
-	if ((stType == 5) || (stType == 7)) c = 0x2;
-
-	if (_inst.Wx == 0)
-	{
-		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
-		Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
-	}
-	else
-	{
-		Helper_Quantize(EA,   (float)rPS0(_inst.RS), stType, stScale);
-	}
-	m_GPR[_inst.RA] = EA;
-}
-
-void CInterpreter::ps_div(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
-	rPS1(_inst.FD) = rPS1(_inst.FA) / rPS1(_inst.FB);
-}
-
-void CInterpreter::ps_sub(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
-	rPS1(_inst.FD) = rPS1(_inst.FA) - rPS1(_inst.FB);
-}
-
-void CInterpreter::ps_add(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
-	rPS1(_inst.FD) = rPS1(_inst.FA) + rPS1(_inst.FB);
-}
-
+// These "binary instructions" do not alter FPSCR.
 void CInterpreter::ps_sel(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = (rPS0(_inst.FA) >= 0.0f) ? rPS0(_inst.FC) : rPS0(_inst.FB);
-	rPS1(_inst.FD) = (rPS1(_inst.FA) >= 0.0f) ? rPS1(_inst.FC) : rPS1(_inst.FB);
-}
-
-void CInterpreter::ps_res(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = 1.0f / rPS0(_inst.FB);
-	rPS1(_inst.FD) = 1.0f / rPS1(_inst.FB);
-}
-
-void CInterpreter::ps_mul(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC);
-	rPS1(_inst.FD) = rPS1(_inst.FA) * rPS1(_inst.FC);
-}
-
-void CInterpreter::ps_rsqrte(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = 1.0 / sqrt(rPS0(_inst.FB));
-	rPS1(_inst.FD) = 1.0 / sqrt(rPS1(_inst.FB));
-}
-
-void CInterpreter::ps_msub(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
-	rPS1(_inst.FD) = (rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB);
-}
-
-void CInterpreter::ps_madd(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
-	rPS1(_inst.FD) = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
-}
-
-void CInterpreter::ps_nmsub(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = -(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB));
-	rPS1(_inst.FD) = -(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB));
-}
-
-void CInterpreter::ps_nmadd(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = -(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB));
-	rPS1(_inst.FD) = -(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB));
+	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) >= -0.0) ? rPS1(_inst.FC) : rPS1(_inst.FB));
 }

 void CInterpreter::ps_neg(UGeckoInstruction _inst)
@ -521,89 +50,7 @@ void CInterpreter::ps_abs(UGeckoInstruction _inst)
 	riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63); 
 }

-void CInterpreter::ps_sum0(UGeckoInstruction _inst)
-{
-	double p0 = rPS0(_inst.FA) + rPS1(_inst.FB);
-	double p1 = rPS1(_inst.FC);
-	rPS0(_inst.FD) = p0;
-	rPS1(_inst.FD) = p1;
-}
-
-void CInterpreter::ps_sum1(UGeckoInstruction _inst)
-{
-	double p0 = rPS0(_inst.FC);
-	double p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
-	rPS0(_inst.FD) = p0;
-	rPS1(_inst.FD) = p1;
-}
-
-void CInterpreter::ps_muls0(UGeckoInstruction _inst)
-{
-	double p0 = rPS0(_inst.FA) * rPS0(_inst.FC);
-	double p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
-	rPS0(_inst.FD) = p0;
-	rPS1(_inst.FD) = p1;
-}
-
-void CInterpreter::ps_muls1(UGeckoInstruction _inst)
-{
-	double p0 = rPS0(_inst.FA) * rPS1(_inst.FC);
-	double p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
-	rPS0(_inst.FD) = p0;
-	rPS1(_inst.FD) = p1;
-}
-
-void CInterpreter::ps_madds0(UGeckoInstruction _inst)
-{
-	double p0 = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
-	double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
-	rPS0(_inst.FD) = p0;
-	rPS1(_inst.FD) = p1;
-}
-
-void CInterpreter::ps_madds1(UGeckoInstruction _inst)
-{
-	double p0 = (rPS0(_inst.FA) * rPS1(_inst.FC)) + rPS0(_inst.FB);
-	double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
-	rPS0(_inst.FD) = p0;
-	rPS1(_inst.FD) = p1;
-}
-
-void CInterpreter::ps_cmpu0(UGeckoInstruction _inst)
-{
-	double fa = rPS0(_inst.FA);
-	double fb = rPS0(_inst.FB);
-	int compareResult;
-	if (fa < fb)		compareResult = 8; 
-	else if (fa > fb) 	compareResult = 4; 
-	else				compareResult = 2;
-	SetCRField(_inst.CRFD, compareResult);
-}
-
-void CInterpreter::ps_cmpo0(UGeckoInstruction _inst)
-{
-	// for now HACK
-	ps_cmpu0(_inst);
-}
-
-void CInterpreter::ps_cmpu1(UGeckoInstruction _inst)
-{
-	double fa = rPS1(_inst.FA);
-	double fb = rPS1(_inst.FB);
-	int compareResult;
-	if (fa < fb)		compareResult = 8; 
-	else if (fa > fb)	compareResult = 4; 
-	else				compareResult = 2;
-
-	SetCRField(_inst.CRFD, compareResult);
-}
-
-void CInterpreter::ps_cmpo1(UGeckoInstruction _inst)
-{
-	// for now HACK
-	ps_cmpu1(_inst);
-}
-
+// These are just moves, double is OK.
 void CInterpreter::ps_merge00(UGeckoInstruction _inst)
 {
 	double p0 = rPS0(_inst.FA);
@ -636,6 +83,159 @@ void CInterpreter::ps_merge11(UGeckoInstruction _inst)
 	rPS1(_inst.FD) = p1;
 }

+
+// From here on, the real deal.
+
+void CInterpreter::ps_div(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) / rPS1(_inst.FB));
+	FPSCR.FI = 0;
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+}
+
+void CInterpreter::ps_sub(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
+}
+
+void CInterpreter::ps_add(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
+}
+
+void CInterpreter::ps_res(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB));
+	rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB));
+}
+
+void CInterpreter::ps_mul(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC));
+}
+
+void CInterpreter::ps_rsqrte(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
+	rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+}
+
+void CInterpreter::ps_msub(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB));
+}
+
+void CInterpreter::ps_madd(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB));
+}
+
+void CInterpreter::ps_nmsub(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB)));
+	rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB)));
+}
+
+void CInterpreter::ps_nmadd(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB)));
+	rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB)));
+}
+
+void CInterpreter::ps_sum0(UGeckoInstruction _inst)
+{
+	double p0 = (float)(rPS0(_inst.FA) + rPS1(_inst.FB));
+	double p1 = (float)(rPS1(_inst.FC));
+	rPS0(_inst.FD) = p0;
+	rPS1(_inst.FD) = p1;
+}
+
+void CInterpreter::ps_sum1(UGeckoInstruction _inst)
+{
+	float p0 = rPS0(_inst.FC);
+	float p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
+	rPS0(_inst.FD) = p0;
+	rPS1(_inst.FD) = p1;
+}
+
+void CInterpreter::ps_muls0(UGeckoInstruction _inst)
+{
+	float p0 = rPS0(_inst.FA) * rPS0(_inst.FC);
+	float p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
+	rPS0(_inst.FD) = p0;
+	rPS1(_inst.FD) = p1;
+}
+
+void CInterpreter::ps_muls1(UGeckoInstruction _inst)
+{
+	float p0 = rPS0(_inst.FA) * rPS1(_inst.FC);
+	float p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
+	rPS0(_inst.FD) = p0;
+	rPS1(_inst.FD) = p1;
+}
+
+void CInterpreter::ps_madds0(UGeckoInstruction _inst)
+{
+	float p0 = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
+	float p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
+	rPS0(_inst.FD) = p0;
+	rPS1(_inst.FD) = p1;
+}
+
+void CInterpreter::ps_madds1(UGeckoInstruction _inst)
+{
+	float p0 = (rPS0(_inst.FA) * rPS1(_inst.FC)) + rPS0(_inst.FB);
+	float p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
+	rPS0(_inst.FD) = p0;
+	rPS1(_inst.FD) = p1;
+}
+
+void CInterpreter::ps_cmpu0(UGeckoInstruction _inst)
+{
+	float fa = rPS0(_inst.FA);
+	float fb = rPS0(_inst.FB);
+	int compareResult;
+	if (fa < fb)		compareResult = 8; 
+	else if (fa > fb) 	compareResult = 4; 
+	else				compareResult = 2;
+	SetCRField(_inst.CRFD, compareResult);
+}
+
+void CInterpreter::ps_cmpo0(UGeckoInstruction _inst)
+{
+	// for now HACK
+	ps_cmpu0(_inst);
+}
+
+void CInterpreter::ps_cmpu1(UGeckoInstruction _inst)
+{
+	float fa = rPS1(_inst.FA);
+	float fb = rPS1(_inst.FB);
+	int compareResult;
+	if (fa < fb)		compareResult = 8; 
+	else if (fa > fb)	compareResult = 4; 
+	else				compareResult = 2;
+
+	SetCRField(_inst.CRFD, compareResult);
+}
+
+void CInterpreter::ps_cmpo1(UGeckoInstruction _inst)
+{
+	// for now HACK
+	ps_cmpu1(_inst);
+}
+
 // __________________________________________________________________________________________________
 // dcbz_l
 // TODO(ector) check docs
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@ -34,12 +34,22 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
 #include "../../Core.h"
 #include "Interpreter.h"

+/*

+Most of these are together with fctiwx
+mffsx: 800c3624
+mffsx: 80043c98
+mffsx: 8003dd48
+mffsx: 8003dd9c
+mffsx: 80036608
+mffsx: 80036650 (huh?)
+
+*/
 // TODO(ector): More proper handling of SSE state.
 // That is, set rounding mode etc when entering jit code or the interpreter loop
 // Restore rounding mode when calling anything external

-void UpdateSSEState(int round, bool daz)
+void UpdateSSEState()
 {
 	u32 csr = _mm_getcsr();
 	
@ -51,14 +61,14 @@ void UpdateSSEState(int round, bool daz)
 		1,
 	};
 	csr = csr & 0x9FFF;
-	csr |= ssetable[round] << 13;
+	csr |= ssetable[FPSCR.RN] << 13;

 	// Also handle denormals as zero (FZ + DAZ)
 	csr &= ~0x8020;

-	// SETTING DAZ KILLS BEYOND GOOD AND EVIL
-	// if (daz)
-	//	csr |= 0x8020;
+	// SETTING FTZ+DAZ KILLS BEYOND GOOD AND EVIL
+	//if (daz)
+	//	csr |= 0x20; // Only set DAZ  //0x8020;
 	
 	_mm_setcsr(csr);
 }
@ -72,7 +82,6 @@ void RestoreSSEState()
 void UpdateFPSCR(UReg_FPSCR fp)
 {
 	// Set FPU rounding mode to mimic the PowerPC's
-	int round = fp.RN;
 #ifdef _M_IX86
 	// This shouldn't really be needed anymore since we use SSE
 #ifdef _WIN32
@ -83,7 +92,7 @@ void UpdateFPSCR(UReg_FPSCR fp)
 		_RC_UP,
 		_RC_DOWN
 	};
-	_set_controlfp(_MCW_RC, table[round]);
+	_set_controlfp(_MCW_RC, table[fp.RN]);
 #else
 	const unsigned short table[4] = 
 	{
@ -94,19 +103,48 @@ void UpdateFPSCR(UReg_FPSCR fp)
 	};
 	unsigned short mode;
 	asm ("fstcw %0" : : "m" (mode));
-	mode = (mode & ~FPU_ROUND_MASK) | table[round];
+	mode = (mode & ~FPU_ROUND_MASK) | table[fp.RN];
 	asm ("fldcw %0" : : "m" (mode));
 #endif
 #endif
+	if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
+	{
+		PanicAlert("FPSCR - exceptions enabled. Please report.");
+	}
+
 	// Also corresponding SSE rounding mode setting
-	UpdateSSEState(round, fp.NI ? true : false);
+	UpdateSSEState();
 }

 void CInterpreter::mcrfs(UGeckoInstruction _inst)
 {
-	// TODO(ector): check a ppc manual for this one
 	u32 fpflags = ((FPSCR.Hex >> (4*(_inst.CRFS))) & 0xF);
-	FPSCR.Hex &= ~(0xF0000000 >> (_inst.CRFS*4));
+	switch (_inst.CRFS) {
+	case 0:
+		FPSCR.FX = 0;
+		FPSCR.OX = 0;
+		break;
+	case 1:
+		FPSCR.UX = 0;
+		FPSCR.ZX = 0;
+		FPSCR.XX = 0;
+		FPSCR.VXSNAN = 0;
+		break;
+	case 2:
+		FPSCR.VXISI = 0;
+		FPSCR.VXIDI = 0;
+		FPSCR.VXZDZ = 0;
+		FPSCR.VXIMZ = 0;
+		break;
+	case 3:
+		FPSCR.VXVC = 0;
+		break;
+	case 5:
+		FPSCR.VXSOFT = 0;
+		FPSCR.VXSQRT = 0;
+		FPSCR.VXCVI = 0;
+		break;
+	}
 	SetCRField(_inst.CRFD, fpflags);
 	UpdateFPSCR(FPSCR);
 }
@ -127,8 +165,6 @@ void CInterpreter::mcrfs(UGeckoInstruction _inst)
 #define MXCSR_ROUND (16384|8192)
 #define MXCSR_FLUSH 32768

-
-
 void CInterpreter::mffsx(UGeckoInstruction _inst)
 {
 	// load from FPSCR
@ -136,31 +172,35 @@ void CInterpreter::mffsx(UGeckoInstruction _inst)
 	// TODO(ector): grab all overflow flags etc and set them in FPSCR

 	riPS0(_inst.FD)	= (u64)FPSCR.Hex;
+	if (_inst.Rc) PanicAlert("mffsx: inst_.Rc");
 }

 void CInterpreter::mtfsb0x(UGeckoInstruction _inst)
 {
 	FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD));
 	UpdateFPSCR(FPSCR);
+	if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
 }

 void CInterpreter::mtfsb1x(UGeckoInstruction _inst)
 {
 	FPSCR.Hex |= 0x80000000 >> _inst.CRBD;
 	UpdateFPSCR(FPSCR);
+	if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
 }

 void CInterpreter::mtfsfix(UGeckoInstruction _inst)
 {
-	u32 mask = (0xF0000000 >> (4*_inst.CRFD));
+	u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
 	u32 imm = (_inst.hex << 16) & 0xF0000000;
-	FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4*_inst.CRFD));
+	FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
 	UpdateFPSCR(FPSCR);
+	if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
 }

 void CInterpreter::mtfsfx(UGeckoInstruction _inst)
 {
-	u32 fm	= _inst.FM;
+	u32 fm = _inst.FM;
 	u32 m = 0;
 	for (int i = 0; i < 8; i++) {  //7?? todo check
 		if (fm & (1 << i))
@ -169,6 +209,7 @@ void CInterpreter::mtfsfx(UGeckoInstruction _inst)

 	FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
 	UpdateFPSCR(FPSCR);
+	if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
 }

 void CInterpreter::mcrxr(UGeckoInstruction _inst)
@ -240,7 +281,7 @@ void CInterpreter::mtsrin(UGeckoInstruction _inst)

 void CInterpreter::mftb(UGeckoInstruction _inst)
 {
-	int iIndex = (_inst.TBR >> 5) | ((_inst.TBR&0x1F) << 5);
+	int iIndex = (_inst.TBR >> 5) | ((_inst.TBR & 0x1F) << 5);
 	if (iIndex == 268)		m_GPR[_inst.RD] = TL;
 	else if (iIndex == 269)	m_GPR[_inst.RD] = TU;
 	else					_dbg_assert_(GEKKO,0);
@ -449,4 +490,3 @@ void CInterpreter::isync(UGeckoInstruction _inst)
 {
 	//shouldnt do anything
 }
-
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
@ -231,6 +231,21 @@ namespace Jit64
 	JitState js;
 	JitOptions jo;

+	void Init()
+	{
+		jo.optimizeStack = true;
+		jo.enableBlocklink = true;  // Speed boost, but not 100% safe
+#ifdef _M_X64
+		jo.enableFastMem = Core::GetStartupParameter().bUseFastMem;
+#else
+		jo.enableFastMem = false;
+#endif
+		jo.assumeFPLoadFromMem = true;
+		jo.fpAccurateFlags = true;
+		jo.optimizeGatherPipe = true;
+		jo.interpretFPU = false;
+	}
+
 	void WriteCallInterpreter(UGeckoInstruction _inst)
 	{
 		gpr.Flush(FLUSH_ALL);
@ -263,12 +278,6 @@ namespace Jit64
 		// Yup, just don't do anything.
 	}

-	// RESULTS (running kururin with optimizations on)
-	// at block 13968 they diverge.
-	// linux goes to 8010fe54
-	// windoze goes to 8010feb0
-	// after they they are completely out of sync.
-    // branches from the cmp result of r0, which comes from an lbz (loaded from stack)
 	static const bool ImHereDebug = false;
 	static const bool ImHereLog = false;
 	static std::map<u32, int> been_here;
@ -403,7 +412,12 @@ namespace Jit64
 			js.op = &ops[i];
 			js.instructionNumber = i;
 			if (i == (int)size - 1) js.isLastInstruction = true;
+			
+			// const GekkoOpInfo *info = GetOpInfo();
 			// if (js.isLastInstruction)
+			if (jo.interpretFPU && PPCTables::UsesFPU(ops[i].inst))
+				Default(ops[i].inst);
+			else
 				PPCTables::CompileInstruction(ops[i].inst);
 			// else
 			// 	Default(ops[i].inst);
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h
@ -70,11 +70,14 @@ namespace Jit64
 		bool fpAccurateFlags;
 		bool enableFastMem;
 		bool optimizeGatherPipe;
+		bool interpretFPU;
 	};

 	extern JitState js;
 	extern JitOptions jo;

+	void Init();
+
 	void Default(UGeckoInstruction _inst);
 	void DoNothing(UGeckoInstruction _inst);
 	
--- a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp
@ -76,17 +76,6 @@ namespace Jit64

 	void InitCache()
 	{
-		jo.optimizeStack = true;
-		jo.enableBlocklink = true;  // Speed boost, but not 100% safe
-#ifdef _M_X64
-		jo.enableFastMem = Core::GetStartupParameter().bUseFastMem;
-#else
-		jo.enableFastMem = false;
-#endif
-		jo.assumeFPLoadFromMem = true;
-		jo.fpAccurateFlags = true;
-		jo.optimizeGatherPipe = true;
-
 		codeCache    = (u8*)AllocateExecutableMemory(CODE_SIZE);
 		genFunctions = (u8*)AllocateExecutableMemory(GEN_SIZE);
 		trampolineCache = (u8*)AllocateExecutableMemory(TRAMPOLINE_SIZE);
--- a/Source/Core/Core/Src/PowerPC/Jit64/JitCore.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCore.cpp
@ -17,6 +17,7 @@
 #include "JitCore.h"
 #include "JitCache.h"
 #include "JitAsm.h"
+#include "Jit.h"

 #include "../../HW/Memmap.h"
 #include "../../HW/CPU.h"
@ -31,6 +32,7 @@ namespace Jit64
 {
 	void Jit64Core::Init()
 	{
+		::Jit64::Init();
 		InitCache();
 		Asm::compareEnabled = Core::g_CoreStartupParameter.bRunCompareClient;
 	}
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
@ -122,9 +122,9 @@ namespace Jit64
 		bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0;
 		bool ctrDecremented = false;

-		if ((inst.BO & 16) == 0)  // Test CR with a combination of bits
+		if ((inst.BO & 16) == 0)  // Test a CR bit
 		{
-			TEST(32, M(&CR), Imm32(0x80000000>>inst.BI));
+			TEST(32, M(&CR), Imm32(0x80000000 >> inst.BI));
 			if (inst.BO & 8)  // Conditional branch 
 				branch = CC_NZ;
 			else
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp
@ -150,7 +150,6 @@ namespace Jit64
 		fpr.UnlockAll();
 	}
 	
-
 	void fmrx(UGeckoInstruction inst)
 	{
 		INSTRUCTION_START;
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp
@ -25,6 +25,7 @@
 #include "Jit.h"
 #include "JitCache.h"
 #include "JitRegCache.h"
+#include "Jit_Util.h"

 // TODO
 // ps_madds0
@ -198,7 +199,7 @@ namespace Jit64
 			op(XMM0, Gen::R(XMM1));
 			MOVAPD(fpr.RX(d), Gen::R(XMM0));
 		}
-		//fpr.SetDirty(fpr.RX(d));
+		ForceSinglePrecisionP(fpr.RX(d));
 		fpr.UnlockAll();
 	}

@ -308,6 +309,7 @@ namespace Jit64
 		}
 		fpr.LoadToX64(d, false);
 		MOVAPD(fpr.RX(d), Gen::R(XMM0));
+		ForceSinglePrecisionP(fpr.RX(d));
 		fpr.UnlockAll();
 	}

--- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp
+++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp
@ -19,6 +19,7 @@

 #include "Common.h"
 #include "PPCTables.h"
+#include "StringUtil.h"
 #include "Interpreter/Interpreter.h"

 #if defined(_M_IX86) || defined(_M_X64)
@ -409,7 +410,7 @@ GekkoOPTemplate table59[] =
 	{18, CInterpreter::fdivsx,   Jit64::fp_arith_s,    {"fdivsx",   OPTYPE_FPU, FL_RC_BIT_F, 16}}, 
 	{20, CInterpreter::fsubsx,   Jit64::fp_arith_s,    {"fsubsx",   OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{21, CInterpreter::faddsx,   Jit64::fp_arith_s,    {"faddsx",   OPTYPE_FPU, FL_RC_BIT_F}}, 
-	{22, CInterpreter::fsqrtsx,  Jit64::Default,       {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, 
+//	{22, CInterpreter::fsqrtsx,  Jit64::Default,       {"fsqrtsx",  OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko
 	{24, CInterpreter::fresx,    Jit64::Default,       {"fresx",    OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{25, CInterpreter::fmulsx,   Jit64::fp_arith_s,    {"fmulsx",   OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{28, CInterpreter::fmsubsx,  Jit64::fmaddXX,       {"fmsubsx",  OPTYPE_FPU, FL_RC_BIT_F}}, 
@ -637,11 +638,26 @@ void PPCTables::InitTables()
 		m_allInstructions[m_numInstructions++] = &table63[i].opinfo;
 	for (int i = 0; i < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); i++)
 		m_allInstructions[m_numInstructions++] = &table63_2[i].opinfo;
+	if (m_numInstructions >= 2048) {
+		PanicAlert("m_allInstructions underdimensioned");
+	}
+}
+
+namespace {
+	std::vector<u32> rsplocations;
 }

 void PPCTables::CompileInstruction(UGeckoInstruction _inst)
 {
 	dynaOpTable[_inst.OPCD](_inst);	
+	GekkoOPInfo *info = GetOpInfo(_inst);
+	if (info) {
+		if (!strcmp(info->opname, "mffsx")) {
+			rsplocations.push_back(Jit64::js.compilerPC);
+		}
+		info->compileCount++;
+		info->lastUse = Jit64::js.compilerPC;
+	}
 }

 bool PPCTables::IsValidInstruction(UGeckoInstruction _instCode)
@ -685,3 +701,30 @@ void PPCTables::PrintInstructionRunCounts()
        LOG(GEKKO, "%s : %i", temp[i].name,temp[i].count);
 	}
 }
+
+void PPCTables::LogCompiledInstructions()
+{
+	static int time = 0;
+	FILE *f = fopen(StringFromFormat("inst_log%i.txt", time).c_str(), "w");
+	for (int i = 0; i < m_numInstructions; i++)
+	{
+		if (m_allInstructions[i]->compileCount > 0) {
+			fprintf(f, "%s\t%i\t%i\t%08x\n", m_allInstructions[i]->opname, m_allInstructions[i]->compileCount, m_allInstructions[i]->runCount, m_allInstructions[i]->lastUse);
+		}
+	}
+	fclose(f);
+	f = fopen(StringFromFormat("inst_not%i.txt", time).c_str(), "w");
+	for (int i = 0; i < m_numInstructions; i++)
+	{
+		if (m_allInstructions[i]->compileCount == 0) {
+			fprintf(f, "%s\t%i\t%i\n", m_allInstructions[i]->opname, m_allInstructions[i]->compileCount, m_allInstructions[i]->runCount);
+		}
+	}
+	fclose(f);
+	f = fopen(StringFromFormat("rsp_at.txt", time).c_str(), "w");
+	for (int i = 0; i < rsplocations.size(); i++) {
+		fprintf(f, "mffsx: %08x\n", rsplocations[i]);
+	}
+	fclose(f);
+	time++;
+}
--- a/Source/Core/Core/Src/PowerPC/PPCTables.h
+++ b/Source/Core/Core/Src/PowerPC/PPCTables.h
@ -73,6 +73,8 @@ struct GekkoOPInfo
 	int flags;
 	int numCyclesMinusOne;
 	int runCount;
+	int compileCount;
+	u32 lastUse;
 };


@ -92,6 +94,7 @@ public:

 	static void CountInstruction(UGeckoInstruction _inst);
 	static void PrintInstructionRunCounts();
+	static void LogCompiledInstructions();

 	static void CompileInstruction(UGeckoInstruction _inst);
 };
--- a/Source/Core/Core/Src/SConscript
+++ b/Source/Core/Core/Src/SConscript
@ -63,6 +63,7 @@ files = ["Console.cpp",
         "PowerPC/Interpreter/Interpreter_FloatingPoint.cpp",
         "PowerPC/Interpreter/Interpreter_Paired.cpp",
         "PowerPC/Interpreter/Interpreter_LoadStore.cpp",
+         "PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp",
         "PowerPC/Interpreter/Interpreter_SystemRegisters.cpp",
         "PowerPC/Jit64/Jit.cpp",
         "PowerPC/Jit64/JitCore.cpp",
--- a/Source/Core/DebuggerWX/src/CodeWindow.cpp
+++ b/Source/Core/DebuggerWX/src/CodeWindow.cpp
@ -46,6 +46,7 @@
 #include "Debugger/PPCDebugInterface.h"
 #include "Debugger/Debugger_SymbolMap.h"
 #include "PowerPC/PPCAnalyst.h"
+#include "PowerPC/PPCTables.h"
 #include "PowerPC/Jit64/Jit.h"
 #include "PowerPC/Jit64/JitCache.h"

@ -71,6 +72,9 @@ BEGIN_EVENT_TABLE(CCodeWindow, wxFrame)
 	EVT_MENU(IDM_SCANFUNCTIONS,     CCodeWindow::OnSymbolsMenu)
 	EVT_MENU(IDM_LOADMAPFILE,       CCodeWindow::OnSymbolsMenu)
 	EVT_MENU(IDM_SAVEMAPFILE,       CCodeWindow::OnSymbolsMenu)
+
+	EVT_MENU(IDM_CLEARCODECACHE,    CCodeWindow::OnJitMenu)
+	EVT_MENU(IDM_LOGINSTRUCTIONS,   CCodeWindow::OnJitMenu)
 	// toolbar
 	EVT_MENU(IDM_DEBUG_GO,			CCodeWindow::OnCodeStep)
 	EVT_MENU(IDM_STEP,				CCodeWindow::OnCodeStep)
@ -244,6 +248,7 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam
 	{
 		wxMenu *pJitMenu = new wxMenu;
 		pJitMenu->Append(IDM_CLEARCODECACHE, _T("&Clear code cache"));
+		pJitMenu->Append(IDM_LOGINSTRUCTIONS, _T("&Log JIT instruction coverage"));
 		pMenuBar->Append(pJitMenu, _T("&JIT"));
 	}

@ -275,6 +280,9 @@ void CCodeWindow::OnJitMenu(wxCommandEvent& event)
 	case IDM_CLEARCODECACHE:
 		Jit64::ClearCache();
 		break;
+	case IDM_LOGINSTRUCTIONS:
+		PPCTables::LogCompiledInstructions();
+		break;
 	}
 }

--- a/Source/Core/DebuggerWX/src/CodeWindow.h
+++ b/Source/Core/DebuggerWX/src/CodeWindow.h
@ -80,6 +80,7 @@ class CCodeWindow
 			IDM_BREAKPOINTWINDOW,
 			IDM_MEMORYWINDOW,
 			IDM_SCANFUNCTIONS,
+			IDM_LOGINSTRUCTIONS,
 			IDM_LOADMAPFILE,
 			IDM_SAVEMAPFILE,
 			IDM_CLEARCODECACHE,
--- a/Source/Core/DolphinWX/src/Frame.cpp
+++ b/Source/Core/DolphinWX/src/Frame.cpp
@ -303,7 +303,7 @@ void CFrame::OnOpen(wxCommandEvent& WXUNUSED (event))
 			wxEmptyString, wxEmptyString, wxEmptyString,
 			wxString::Format
 			(
-					_T("Elf files (*.elf)|*.elf|DOL files (*.dol)|*.dol|Gamecube/Wii ISO (*.iso;*.gcm)|*.iso;*.gcm|All files (%s)|%s"),
+					_T("All GC/Wii files (elf, dol, gcm, iso)|*.elf;*.dol;*.gcm;*.iso|All files (%s)|%s"),
 					wxFileSelectorDefaultWildcardStr,
 					wxFileSelectorDefaultWildcardStr
 			),
--- a/Source/Core/DolphinWX/src/GameListCtrl.h
+++ b/Source/Core/DolphinWX/src/GameListCtrl.h
@ -68,7 +68,7 @@ class CGameListCtrl : public wxListCtrl
 		void OnEditPatchFile(wxCommandEvent& event);
 		void OnOpenContainingFolder(wxCommandEvent& event);

-		virtual bool MSWDrawSubItem(wxPaintDC& rPainDC, int item, int subitem);
+		virtual bool MSWDrawSubItem(wxPaintDC& rPaintDC, int item, int subitem);

 		void AutomaticColumnWidth();
 };
--- a/Source/Plugins/Plugin_DSP_HLE/Src/PCHW/Mixer.cpp
+++ b/Source/Plugins/Plugin_DSP_HLE/Src/PCHW/Mixer.cpp
@ -83,6 +83,7 @@ void Mixer_PushSamples(short *buffer, int num_stereo_samples, int sample_rate) {
 	static int PV1r=0,PV2r=0,PV3r=0,PV4r=0;
 	static int acc=0;

+	if (!GetAsyncKeyState(VK_TAB)) {
 	while (queue_size > queue_maxlength / 2) {
 #ifdef _WIN32
 		DSound::DSound_UpdateSound();
@ -91,7 +92,9 @@ void Mixer_PushSamples(short *buffer, int num_stereo_samples, int sample_rate) {
 		sleep(0);
 #endif
 	}
-
+	} else {
+		return;
+	}
 	//convert into config option?
 	const int mode = 2;

--- a/Source/Plugins/Plugin_VideoDX9/Src/OpcodeDecoding.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/OpcodeDecoding.cpp
@ -210,16 +210,6 @@ bool FifoCommandRunnable(void)

 void Decode(void)
 {
-    static int DecoderCount = 0;
-    DecoderCount++;
-
-    if (DecoderCount == 0x0019c601)
-    {
-        int i = 0;
-    }
-
-    // 0x0019c603  <- error 
-
    int Cmd = g_pDataReader->Read8();
 	switch(Cmd)
 	{
@ -236,7 +226,6 @@ void Decode(void)

 	case GX_LOAD_XF_REG:
 		{
-			u32 test = PeekFifo32(0);
 			u32 Cmd2 = g_pDataReader->Read32();
 			
 			int dwTransferSize = ((Cmd2>>16)&15) + 1;
--- a/Source/Plugins/Plugin_VideoOGL/Src/OpcodeDecoding.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/OpcodeDecoding.cpp
@ -145,23 +145,26 @@ bool FifoCommandRunnable(void)
    case GX_LOAD_XF_REG:
        {
            // check if we can read the header
-            if (iBufferSize >= 5) {				
+            if (iBufferSize >= 5)
+			{				
                iCommandSize = 1 + 4;
                u32 Cmd2 = PeekFifo32(1);
-                int dwTransferSize = ((Cmd2>>16)&15) + 1;
+                int dwTransferSize = ((Cmd2 >> 16) & 15) + 1;
                iCommandSize += dwTransferSize * 4;				
            }
-            else {
+            else
+			{
                return false;
            }			
        }
        break;

    default:
-        if (Cmd&0x80)
+        if (Cmd & 0x80)
        {				
            // check if we can read the header
-            if (iBufferSize >= 3) {
+            if (iBufferSize >= 3)
+			{
                iCommandSize = 1 + 2;
                u16 numVertices = PeekFifo16(1);
                VertexLoader& vtxLoader = g_VertexLoaders[Cmd & GX_VAT_MASK];
@ -189,7 +192,7 @@ bool FifoCommandRunnable(void)
    if (iCommandSize > iBufferSize)
        return false;

-    INFO_LOG("OP detected: Cmd 0x%x  size %i  buffer %i",Cmd, iCommandSize, iBufferSize);
+    // INFO_LOG("OP detected: Cmd 0x%x  size %i  buffer %i",Cmd, iCommandSize, iBufferSize);

    return true;
 }
@ -268,7 +271,7 @@ void Decode(void)
        {			
            // load vertices
            u16 numVertices = g_pDataReader->Read16();		
-            if( numVertices > 0 ) {
+            if (numVertices > 0) {
                g_VertexLoaders[Cmd & GX_VAT_MASK].RunVertices((Cmd & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices);
            }
        }