Improve accuracy of FPU emulation slightly - still no F-Zero improvements :(

Generic code cleanup. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3458 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-07-25 07:09:48 -06:00 · 2009-06-15 21:10:11 +00:00
parent dcae21f692
commit 4dba267775
16 changed files with 355 additions and 383 deletions
--- a/Source/Core/Common/Src/MathUtil.cpp
+++ b/Source/Core/Common/Src/MathUtil.cpp
@ -32,7 +32,7 @@ static const u32 default_sse_state = _mm_getcsr();
 namespace MathUtil
 {

-int ClassifyFP(double dvalue)
+int ClassifyDouble(double dvalue)
 {
 	// TODO: Optimize the below to be as fast as possible.
 	IntDouble value;
@ -79,6 +79,53 @@ int ClassifyFP(double dvalue)
 	return 0x4;
 }

+int ClassifyFloat(float fvalue)
+{
+	// TODO: Optimize the below to be as fast as possible.
+	IntFloat value;
+	value.f = fvalue;
+	// 5 bits (C, <, >, =, ?)
+	// easy cases first
+	if (value.i == 0) {
+		// positive zero
+		return 0x2;
+	} else if (value.i == 0x80000000) {
+		// negative zero
+	   return 0x12;
+	} else if (value.i == 0x7F800000) {
+		// positive inf
+		return 0x5;
+	} else if (value.i == 0xFF800000) {
+		// negative inf
+		return 0x9;
+	} else {
+		// OK let's dissect this thing.
+		int sign = value.i >> 31;
+		int exp = (int)((value.i >> 23) & 0xFF);
+		if (exp >= 1 && exp <= 254) {
+			// Nice normalized number.
+			if (sign) {
+				return 0x8; // negative
+			} else {
+				return 0x4; // positive
+			}
+		}
+		u64 mantissa = value.i & 0x007FFFFF;
+		if (exp == 0 && mantissa) {
+			// Denormalized number.
+			if (sign) {
+				return 0x18;
+			} else {
+				return 0x14;
+			}
+		} else if (exp == 0xFF && mantissa /* && mantissa_top*/) {
+			return 0x11; // Quiet NAN
+		}
+	}
+	
+	return 0x4;
+}
+
 }  // namespace

 void LoadDefaultSSEState()
--- a/Source/Core/Common/Src/MathUtil.h
+++ b/Source/Core/Common/Src/MathUtil.h
@ -98,12 +98,9 @@ enum PPCFpClass

 // Uses PowerPC conventions for the return value, so it can be easily
 // used directly in CPU emulation.
-int ClassifyFP(double dvalue);
-
-// TODO: More efficient float version.
-inline int ClassifyFP(float fvalue) {
-	ClassifyFP((double)fvalue);
-}
+int ClassifyDouble(double dvalue);
+// More efficient float version.
+int ClassifyFloat(float fvalue);

 }  // namespace MathUtil

--- a/Source/Core/Core/Src/Core.cpp
+++ b/Source/Core/Core/Src/Core.cpp
@ -745,16 +745,16 @@ void Callback_VideoCopiedToXFB()
 		*/
 		
 		/**/
-		if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0/3.0 + 1.0/2.0)/2)) FPS_To_VPS_Rate = 1.0/3.0;
-		else if (FPS_To_VPS_Rate > ((1.0/3.0 + 1.0/2.0)/2) && FPS_To_VPS_Rate < ((1.0/2.0 + 1.0/1.0)/2)) FPS_To_VPS_Rate = 1.0/2.0;
+		if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0f/3.0f + 1.0f/2.0f)/2)) FPS_To_VPS_Rate = 1.0f/3.0f;
+		else if (FPS_To_VPS_Rate > ((1.0f/3.0f + 1.0f/2.0f)/2) && FPS_To_VPS_Rate < ((1.0f/2.0f + 1.0f/1.0f)/2)) FPS_To_VPS_Rate = 1.0/2.0;
 		else FPS_To_VPS_Rate = 1.0;	
 		// PAL patch adjustment
-		if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2;
+		if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2f;
 		
 		
 		float TargetFPS = FPS_To_VPS_Rate * (float)VideoInterface::TargetRefreshRate;
-		float FPSPercentage = (FPS / TargetFPS) * 100.0;
-		float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0;
+		float FPSPercentage = (FPS / TargetFPS) * 100.0f;
+		float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0f;
 		
 		// Settings are shown the same for both extended and summary info
 		std::string SSettings = StringFromFormat(" | Core: %s %s",
--- a/Source/Core/Core/Src/HW/VideoInterface.cpp
+++ b/Source/Core/Core/Src/HW/VideoInterface.cpp
@ -338,7 +338,9 @@ static u32 LineCount = 0;
 static u32 LinesPerField = 0;
 static u64 LastTime = 0;
 static u32 NextXFBRender = 0;
-int TargetRefreshRate = 0, SyncTicksProgress = 0; float ActualRefreshRate = 0.0;
+int TargetRefreshRate = 0;
+s64 SyncTicksProgress = 0;
+float ActualRefreshRate = 0.0;

 void DoState(PointerWrap &p)
 {
@ -1042,23 +1044,24 @@ void UpdateTiming()
 // Run when: This is run 7200 times per second on full speed
 void Update()
 {
-	
 	// Update the target refresh rate
 	TargetRefreshRate = (m_DisplayControlRegister.FMT == 0 || m_DisplayControlRegister.FMT == 2)
 		? 60 : 50;

 	// Calculate actual refresh rate
 	static u64 LastTick = 0;
-	static int UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
+	static s64 UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
 	if (UpdateCheck < (int)timeGetTime())
 	{
 		UpdateCheck = timeGetTime() + 1000;
 		TickProgress = CoreTiming::GetTicks() - LastTick;
 		// Calculated CPU-GPU synced ticks for the dual core mode too
-		NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
+		// NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
 		SyncTicksProgress += TickProgress;
 		// Multipled by two because of the way TicksPerFrame is calculated (divided by 25 and 30
 		// rather than 50 and 60)
+
+		// TODO : Feed the FPS estimate into Iulius' framelimiter.
 		ActualRefreshRate = ((float)SyncTicksProgress / (float)TicksPerFrame) * 2.0;		
 		LastTick = CoreTiming::GetTicks();
 		SyncTicksProgress = 0;
--- a/Source/Core/Core/Src/HW/VideoInterface.h
+++ b/Source/Core/Core/Src/HW/VideoInterface.h
@ -52,7 +52,11 @@ namespace VideoInterface

    // Update and draw framebuffer(s)
    void Update();
-	extern float ActualRefreshRate; extern int TargetRefreshRate, SyncTicksProgress;
+
+	// urgh, ugly externs.
+	extern float ActualRefreshRate;
+	extern int TargetRefreshRate;
+	extern s64 SyncTicksProgress;

 	// UpdateInterrupts: check if we have to generate a new VI Interrupt
 	void UpdateInterrupts();
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h
@ -311,7 +311,6 @@ namespace Interpreter

 	// other helper
 	u32 Helper_Mask(int mb, int me);
-	inline bool IsNAN(double _dValue);

 	extern _interpreterInstruction m_opTable[64];
 	extern _interpreterInstruction m_opTable4[1024];
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@ -34,36 +34,16 @@

 #include "../../Core.h"
 #include "Interpreter.h"
+#include "MathUtil.h"

-// SUPER MONKEY BALL IS BEING A ROYAL PAIN
-// We are missing the caller of 800070ec
+// F-ZERO IS BEING A ROYAL PAIN
 // POSSIBLE APPROACHES:
 // * Full SW FPU. Urgh.
-// * Partial SW FPU, emulate just as much as necessary for monkey ball. Feasible but a lot of work.
-// * HLE hacking. Figure out what all the evil functions really do and fake them. DONE (well, works okay-ish)
+// * Partial SW FPU, emulate just as much as necessary for f-zero. Feasible, I guess.
+// * HLE hacking. Figure out what all the evil functions really do and fake them.
+//   This worked well for Monkey Ball, not so much for F-Zero.

-// Interesting places in Super Monkey Ball:
-// 80036654: fctwixz stuff
-// 80007e08:
-//	-98: Various entry points that loads various odd fp values into f1
-// 800070b0: Estimate inverse square root.
-// 800070ec: Examine f1. Reads a value out of locked cache into f2 (fixed address). Some cases causes us to call the above thing.
-//           If all goes well, jump to 70b0, which estimates the inverse square root. 
-//           Then multiply the loaded variable with the original value of f1. Result should be the square root. (1 / sqrt(x)) * x  = x / sqrt(x) = sqrt(x)
-// 8000712c: Similar, but does not do the multiply at the end, just an frspx.
-// 8000716c: Sort of similar, but has extra junk at the end.
-//
-// 
-// 800072a4 - nightmare of nightmares
-// Fun stuff used:
-// bso+
-// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly (!!!!)
-// crclr
-// crset
-// crxor
-// fnabs
-// Super Monkey Ball reads FPRF & friends after fmadds, fmuls, frspx
-// WHY do the FR & FI flags affect it so much?
+using namespace MathUtil;

 namespace Interpreter
 {
@ -71,112 +51,68 @@ namespace Interpreter
 void UpdateFPSCR(UReg_FPSCR fp);
 void UpdateSSEState();

-
-// start of unit test - Dolphin needs more of these!
-/*
-void TestFPRF()
-{
-	UpdateFPRF(1.0);
-	if (FPSCR.FPRF != 0x4)
-		PanicAlert("Error 1");
-	UpdateFPRF(-1.0);
-	if (FPSCR.FPRF != 0x8)
-		PanicAlert("Error 2");
-	PanicAlert("Test done");
-}*/
-
-
-// extremely rare
+// Extremely rare - actually, never seen.
 void Helper_UpdateCR1(double _fValue)
 {
 	// Should just update exception flags, not do any compares.
 	PanicAlert("CR1");
 }

-inline bool IsNAN(double _dValue) 
-{ 
-	return _dValue != _dValue; 
-}
-
-inline bool _IsNAN(float x) {
-	//return ((*(u32*)&x) & 0x7f800000UL) == 0x7f800000UL && ((*(u32*)&x) & 0x007fffffUL);
-	return x != x;
-}
-
 void fcmpo(UGeckoInstruction _inst)
 {
-	/*
-	float fa = static_cast<float>(rPS0(_inst.FA));
-	float fb = static_cast<float>(rPS0(_inst.FB));
-	// normalize
-	if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
-	if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
-	*/
+	// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
+	// the real problem should be fixed instead.
+	double fa = rPS0(_inst.FA);
+	double fb = rPS0(_inst.FB);

-	// normalize if conversion to float gives denormalized number
-	if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FA) &= 0x8000000000000000ULL;
-	if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FB) &= 0x8000000000000000ULL;
-	double fa =	rPS0(_inst.FA);
-	double fb =	rPS0(_inst.FB);
-
-	u32 compareResult;
-	if (IsNAN(fa) || IsNAN(fb))  compareResult = 1;
-	else if (fa < fb)            compareResult = 8; 
-	else if (fa > fb)            compareResult = 4; 
-	else                         compareResult = 2;
+	int compareResult;
+	if (IsNAN(fa) || IsNAN(fb)) 
+	{
+		FPSCR.FX = 1;
+		compareResult = 1;
+		if (IsSNAN(fa) || IsSNAN(fb))
+		{
+			FPSCR.VXSNAN = 1;
+			if (!FPSCR.FEX || IsQNAN(fa) || IsQNAN(fb))
+				FPSCR.VXVC = 1;
+		}
+	}
+	else if (fa < fb)           compareResult = 8; 
+	else if (fa > fb)           compareResult = 4; 
+	else                        compareResult = 2;

 	FPSCR.FPRF = compareResult;
 	SetCRField(_inst.CRFD, compareResult);
-
-/* missing part
-	if ((frA) is an SNaN or (frB) is an SNaN )
-		then VXSNAN <20> 1
-		if VE = 0
-			then VXVC <20> 1
-		else if ((frA) is a QNaN or (frB) is a QNaN )
-		then VXVC <20> 1 */
 }

 void fcmpu(UGeckoInstruction _inst)
 {
-	
+	// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
+	// the real problem should be fixed instead.
+	double fa = rPS0(_inst.FA);
+	double fb = rPS0(_inst.FB);

-	/*
-	float fa = static_cast<float>(rPS0(_inst.FA));
-	float fb = static_cast<float>(rPS0(_inst.FB));
-	// normalize
-	if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
-	if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
-	*/
-
-	// normalize if conversion to float gives denormalized number
-	if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FA) &= 0x8000000000000000ULL;
-	if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FB) &= 0x8000000000000000ULL;
-	double fa =	rPS0(_inst.FA);
-	double fb =	rPS0(_inst.FB);
-
-	u32 compareResult;
-	if (IsNAN(fa) || IsNAN(fb))  compareResult = 1; 
+	int compareResult;
+	if (IsNAN(fa) || IsNAN(fb))
+	{
+		FPSCR.FX = 1;
+		compareResult = 1; 
+		if (IsSNAN(fa) || IsSNAN(fb))
+		{
+			FPSCR.VXSNAN = 1;
+		}
+	}
 	else if (fa < fb)            compareResult = 8; 
 	else if (fa > fb)            compareResult = 4; 
 	else                         compareResult = 2;

 	FPSCR.FPRF = compareResult;
 	SetCRField(_inst.CRFD, compareResult);
-
-/* missing part
-	if ((frA) is an SNaN or (frB) is an SNaN)
-		then VXSNAN <20> 1 */
 }

 // Apply current rounding mode
 void fctiwx(UGeckoInstruction _inst)
 {
-	//UpdateSSEState();
 	const double b = rPS0(_inst.FB);
 	u32 value;
 	if (b > (double)0x7fffffff)
@ -215,7 +151,6 @@ largest representable int on PowerPC. */
 // Always round toward zero
 void fctiwzx(UGeckoInstruction _inst)
 {
-	//UpdateSSEState();
 	const double b = rPS0(_inst.FB);
 	u32 value;
 	if (b > (double)0x7fffffff)
@ -282,76 +217,14 @@ void fselx(UGeckoInstruction _inst)
 // !!! warning !!!
 // PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
 // PS1 is said to be undefined
-// Super Monkey Ball is using this to do wacky tricks so we need 100% correct emulation.
 void frspx(UGeckoInstruction _inst)  // round to single
 {
-	if (true || FPSCR.RN != 0)
-	{
-		// Not used in Super Monkey Ball
-		// UpdateSSEState();
-		double b = rPS0(_inst.FB);
-		double rounded = (double)(float)b;
-		//FPSCR.FI = b != rounded;  // changing both of these affect Super Monkey Ball behaviour greatly.
-		if (Core::g_CoreStartupParameter.bEnableFPRF)
-			UpdateFPRF(rounded);
-		rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
-		return;
-		// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
-	}
-
-	// OK, let's try it in 100% software! Not yet working right.
-	union {
-		double d;
-		u64 i;
-	} in, out;
-	in.d = rPS0(_inst.FB);
-	out = in;
-	int sign = (int)(in.i >> 63);
-	int exp = (int)((in.i >> 52) & 0x7FF);
-	u64 mantissa = in.i & 0x000FFFFFFFFFFFFFULL;
-	u64 mantissa_single = mantissa & 0x000FFFFFE0000000ULL;
-	u64 leftover_single = mantissa & 0x000000001FFFFFFFULL;
-
-	// OK. First make sure that we have a "normal" number.
-	if (exp >= 1 && exp <= 2046) {
-		// OK. Check for overflow. TODO
-
-		FPSCR.FI = leftover_single != 0; // Inexact
-		if (leftover_single >= 0x10000000ULL) {
-			//PanicAlert("rounding up");
-			FPSCR.FR = 1;
-			mantissa_single += 0x20000000;
-			if (mantissa_single & 0x0010000000000000ULL) {
-				// PanicAlert("renormalizing");
-				mantissa_single >>= 1;
-				exp += 1;
-				// if (exp > 2046) { OVERFLOW }
-			}
-		}
-		out.i = ((u64)sign << 63) | ((u64)exp << 52) | mantissa_single;
-	} else {
-		if (!exp && !mantissa) {
-			// Positive or negative Zero. All is well.
-			FPSCR.FI = 0;
-			FPSCR.FR = 0;
-		} else if (exp == 0 && mantissa) {
-			// Denormalized number.
-			PanicAlert("denorm");
-		} else if (exp == 2047 && !mantissa) {
-			// Infinite.
-			//PanicAlert("infinite");
-			FPSCR.FI = 1;
-			FPSCR.FR = 1;
-//			FPSCR.OX = 1;
-		} else {
-			//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
-		}
-	}
-
-	UpdateFPRF(out.d);
-	rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+	double b = rPS0(_inst.FB);
+	double rounded = (double)(float)b;
+	//FPSCR.FI = b != rounded;
+	UpdateFPRF(rounded);
+	rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
+	return;
 }


@ -394,11 +267,13 @@ void fmaddsx(UGeckoInstruction _inst)
 void faddx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void faddsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -407,51 +282,79 @@ void fdivx(UGeckoInstruction _inst)
 {
 	double a = rPS0(_inst.FA);
 	double b = rPS0(_inst.FB);
-	if (a == 0.0f && b == 0.0f)
-		rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0;  // NAN?
-	else
-		rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
-	if (fabs(rPS0(_inst.FB)) == 0.0) {
-		if (!FPSCR.ZX)
-			FPSCR.FX = 1;
-		FPSCR.ZX = 1;
-		FPSCR.XX = 1;
-	}
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-void fdivsx(UGeckoInstruction _inst)
-{
-	float a = rPS0(_inst.FA);
-	float b = rPS0(_inst.FB);
-	if (a != a || b != b)
-		rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0;  // NAN?
-	else
-		rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
+	rPS0(_inst.FD) = a / b;
 	if (b == 0.0) {
 		if (!FPSCR.ZX)
 			FPSCR.FX = 1;
 		FPSCR.ZX = 1;
 		FPSCR.XX = 1;
 	}
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
-void fresx(UGeckoInstruction _inst)
+void fdivsx(UGeckoInstruction _inst)
 {
-	double b = rPS0(_inst.FB);
-	rPS0(_inst.FD) = rPS1(_inst.FD) = 1.0 / b;
-	if (fabs(rPS0(_inst.FB)) == 0.0) {
+	float a = (float)rPS0(_inst.FA);
+	float b = (float)rPS0(_inst.FB);
+	rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
+	if (b == 0.0)
+	{
 		if (!FPSCR.ZX)
 			FPSCR.FX = 1;
 		FPSCR.ZX = 1;
 		FPSCR.XX = 1;
 	}
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
+}
+
+// Single precision only.
+void fresx(UGeckoInstruction _inst)
+{
+	float b = (float)rPS0(_inst.FB);
+	float one_over = 1.0f / b;
+	rPS0(_inst.FD) = rPS1(_inst.FD) = one_over;
+	if (b == 0.0)
+	{
+		if (!FPSCR.ZX)
+			FPSCR.FX = 1;
+		FPSCR.ZX = 1;
+		FPSCR.XX = 1;
+	}
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

+void frsqrtex(UGeckoInstruction _inst)
+{
+	float b = (float)rPS0(_inst.FB);
+	if (b < 0.0) {
+		FPSCR.VXSQRT = 1;
+	} else if (b == 0) {
+		FPSCR.ZX = 1;
+	}
+	rPS0(_inst.FD) = 1.0f / sqrtf(b);	
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void fsqrtx(UGeckoInstruction _inst)
+{
+	// GEKKO is not supposed to support this instruction.
+	// PanicAlert("fsqrtx");
+	double b = rPS0(_inst.FB);
+	if (b < 0.0) {
+		FPSCR.VXSQRT = 1;
+	}
+	rPS0(_inst.FD) = sqrt(b);
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}

 void fmsubx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -459,6 +362,7 @@ void fmsubsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) =
 		static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -466,12 +370,14 @@ void fmsubsx(UGeckoInstruction _inst)
 void fnmaddx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void fnmaddsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = 
 		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -479,12 +385,14 @@ void fnmaddsx(UGeckoInstruction _inst)
 void fnmsubx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void fnmsubsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = 
 		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -492,32 +400,13 @@ void fnmsubsx(UGeckoInstruction _inst)
 void fsubx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void fsubsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void frsqrtex(UGeckoInstruction _inst)
-{
-	double b = rPS0(_inst.FB);
-	if (b <= 0.0)
-		rPS0(_inst.FD) = 0.0;
-	else
-		rPS0(_inst.FD) = 1.0f / (sqrt(b));
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void fsqrtx(UGeckoInstruction _inst)
-{
-	double b = rPS0(_inst.FB);
-	if (b < 0.0)
-	{
-		FPSCR.VXSQRT = 1;
-	}
-	rPS0(_inst.FD) = sqrt(b);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp
@ -493,7 +493,7 @@ void divwux(UGeckoInstruction _inst)
 	u32 a = m_GPR[_inst.RA];
 	u32 b = m_GPR[_inst.RB];

-	if (b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF))
+	if (b == 0) // || (a == 0x80000000 && b == 0xFFFFFFFF))
 	{
 		if (_inst.OE) 
 			PanicAlert("OE: divwux");
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
@ -15,6 +15,9 @@
 // Official SVN repository and contact information can be found at
 // http://code.google.com/p/dolphin-emu/

+#include "Common.h"
+#include "MathUtil.h"
+
 #include "../../HW/Memmap.h"
 #include "../../HW/CommandProcessor.h"
 #include "../../HW/PixelEngine.h"
@ -92,16 +95,18 @@ void lfdx(UGeckoInstruction _inst)
 void lfs(UGeckoInstruction _inst)
 {
 	u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 }

 void lfsu(UGeckoInstruction _inst)
 {
 	u32 uAddress = Helper_Get_EA_U(_inst);
 	u32 uTemp = Memory::Read_U32(uAddress);
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 	m_GPR[_inst.RA] = uAddress;
 }

@ -109,16 +114,18 @@ void lfsux(UGeckoInstruction _inst)
 {
 	u32 uAddress = Helper_Get_EA_UX(_inst);
 	u32 uTemp = Memory::Read_U32(uAddress);
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 	m_GPR[_inst.RA] = uAddress;
 }

 void lfsx(UGeckoInstruction _inst)
 {
 	u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 }

 void lha(UGeckoInstruction _inst)
@ -227,7 +234,8 @@ void stfdu(UGeckoInstruction _inst)

 void stfs(UGeckoInstruction _inst)
 {
-	float fTemp = (float)rPS0(_inst.FS);
+	double value = rPS0(_inst.FS);
+	float fTemp = (float)value;
 	Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
 }

@ -453,27 +461,20 @@ void stfiwx(UGeckoInstruction _inst)
 	Memory::Write_U32((u32)riPS0(_inst.FS), uAddress);
 }

-// __________________________________________________________________________________________________
-// stfsux
-//
-// no paired ??
-//
+
 void stfsux(UGeckoInstruction _inst)
 {
-	float fTemp = (float)rPS0(_inst.FS);
+	double value = rPS0(_inst.FS);
+	float fTemp = (float)value;
 	u32 uAddress = Helper_Get_EA_UX(_inst);
 	Memory::Write_U32(*(u32*)&fTemp, uAddress);
 	m_GPR[_inst.RA] = uAddress;
 }

-// __________________________________________________________________________________________________
-// stfsx
-//
-// no paired ??
-//
 void stfsx(UGeckoInstruction _inst)
 {
-	float fTemp = (float)rPS0(_inst.FS);
+	double value = rPS0(_inst.FS);
+	float fTemp = (float)value;
 	Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst));
 }

--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp
@ -16,41 +16,52 @@
 // http://code.google.com/p/dolphin-emu/

 #include <math.h>
+#include "Common.h"
+#include "MathUtil.h"
 #include "Interpreter.h"
 #include "../../HW/Memmap.h"

+using namespace MathUtil;
+
 namespace Interpreter
 {

 // These "binary instructions" do not alter FPSCR.
 void ps_sel(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB));
-	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) >= -0.0) ? rPS1(_inst.FC) : rPS1(_inst.FB));
+	rPS0(_inst.FD) = !IsNAN(rPS0(_inst.FA)) && rPS0(_inst.FA) >= -0.0 ?
+		              rPS0(_inst.FC) : rPS0(_inst.FB);
+	rPS1(_inst.FD) = !IsNAN(rPS1(_inst.FA)) && rPS1(_inst.FA) >= -0.0 ?
+		              rPS1(_inst.FC) : rPS1(_inst.FB);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_neg(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63);
 	riPS1(_inst.FD) = riPS1(_inst.FB) ^ (1ULL << 63);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_mr(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS0(_inst.FB);
 	rPS1(_inst.FD) = rPS1(_inst.FB);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_nabs(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63); 
 	riPS1(_inst.FD) = riPS1(_inst.FB) | (1ULL << 63); 
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_abs(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) &~ (1ULL << 63); 
 	riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63); 
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 // These are just moves, double is OK.
@ -60,6 +71,7 @@ void ps_merge00(UGeckoInstruction _inst)
 	double p1 = rPS0(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_merge01(UGeckoInstruction _inst)
@ -68,6 +80,7 @@ void ps_merge01(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_merge10(UGeckoInstruction _inst)
@ -76,6 +89,7 @@ void ps_merge10(UGeckoInstruction _inst)
 	double p1 = rPS0(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_merge11(UGeckoInstruction _inst)
@ -84,6 +98,7 @@ void ps_merge11(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }


@ -97,63 +112,75 @@ void ps_div(UGeckoInstruction _inst)
 	if (fabs(rPS0(_inst.FB)) == 0.0) {
 		FPSCR.ZX = 1;
 	}
-}
-
-void ps_sub(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
-	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
-}
-
-void ps_add(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
-	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_res(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB));
 	rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void ps_rsqrte(UGeckoInstruction _inst)
+{
+	// PanicAlert("ps_rsqrte");
+	rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
+	rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void ps_sub(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void ps_add(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_mul(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
 	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

-void ps_rsqrte(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
-	rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
-	if (fabs(rPS0(_inst.FB)) == 0.0) {
-		FPSCR.ZX = 1;
-	}
-}

 void ps_msub(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
 	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_madd(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
 	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_nmsub(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB)));
 	rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB)));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_nmadd(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB)));
 	rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB)));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_sum0(UGeckoInstruction _inst)
@ -162,6 +189,7 @@ void ps_sum0(UGeckoInstruction _inst)
 	double p1 = (float)(rPS1(_inst.FC));
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_sum1(UGeckoInstruction _inst)
@ -170,6 +198,7 @@ void ps_sum1(UGeckoInstruction _inst)
 	double p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_muls0(UGeckoInstruction _inst)
@ -178,6 +207,7 @@ void ps_muls0(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_muls1(UGeckoInstruction _inst)
@ -186,6 +216,7 @@ void ps_muls1(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_madds0(UGeckoInstruction _inst)
@ -194,6 +225,7 @@ void ps_madds0(UGeckoInstruction _inst)
 	double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_madds1(UGeckoInstruction _inst)
@ -202,6 +234,7 @@ void ps_madds1(UGeckoInstruction _inst)
 	double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_cmpu0(UGeckoInstruction _inst)
@ -209,10 +242,12 @@ void ps_cmpu0(UGeckoInstruction _inst)
 	double fa = rPS0(_inst.FA);
 	double fb = rPS0(_inst.FB);
 	int compareResult;
-	if (fa < fb)		compareResult = 8; 
-	else if (fa > fb) 	compareResult = 4; 
-	else				compareResult = 2;
+	if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
+	else if (fa < fb)         	compareResult = 8; 
+	else if (fa > fb)        	compareResult = 4; 
+	else			        	compareResult = 2;
 	SetCRField(_inst.CRFD, compareResult);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_cmpo0(UGeckoInstruction _inst)
@ -226,10 +261,12 @@ void ps_cmpu1(UGeckoInstruction _inst)
 	double fa = rPS1(_inst.FA);
 	double fb = rPS1(_inst.FB);
 	int compareResult;
-	if (fa < fb)		compareResult = 8; 
-	else if (fa > fb)	compareResult = 4; 
-	else				compareResult = 2;
+	if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
+	else if (fa < fb)         	compareResult = 8; 
+	else if (fa > fb)        	compareResult = 4; 
+	else			        	compareResult = 2;
 	SetCRField(_inst.CRFD, compareResult);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_cmpo1(UGeckoInstruction _inst)
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@ -35,6 +35,7 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
 #include <xmmintrin.h>
 #endif

+#include "CPUDetect.h"
 #include "../../CoreTiming.h"
 #include "../../HW/Memmap.h"
 #include "../../HW/GPFifo.h"
@ -60,37 +61,11 @@ mffsx: 80036650 (huh?)
 namespace Interpreter
 {

-void UpdateSSEState()
-{
-	u32 csr = _mm_getcsr();
-	
-	const int ssetable[4] = 
-	{
-		0,
-		3,
-		2,
-		1,
-	};
-	csr = csr & 0x9FFF;
-	csr |= ssetable[FPSCR.RN] << 13;
+const u32 MASKS = 0x1F80;  // mask away the interrupts.
+const u32 DAZ = 0x40;
+const u32 FTZ = 0x8000;

-	// Also handle denormals as zero (FZ + DAZ)
-	csr &= ~0x8020;
-
-	// SETTING FTZ+DAZ KILLS BEYOND GOOD AND EVIL
-	//if (daz)
-	//	csr |= 0x20; // Only set DAZ  //0x8020;
-	
-	_mm_setcsr(csr);
-}
-
-void RestoreSSEState()
-{
-	// A reasonable default
-	_mm_setcsr(0x1fa0);
-}
-
-void UpdateFPSCR(UReg_FPSCR fp)
+void FPSCRtoFPUSettings(UReg_FPSCR fp)
 {
 	// Set FPU rounding mode to mimic the PowerPC's
 #ifdef _M_IX86
@ -120,12 +95,28 @@ void UpdateFPSCR(UReg_FPSCR fp)
 #endif
 	if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
 	{
-		// PanicAlert("FPSCR - exceptions enabled. Please report.");
+		//PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
+		//	fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
 		// Pokemon Colosseum does this. Gah.
 	}

 	// Also corresponding SSE rounding mode setting
-	UpdateSSEState();
+	static const u32 ssetable[4] = 
+	{
+		(0 << 13) | MASKS,
+		(3 << 13) | MASKS,
+		(2 << 13) | MASKS,
+		(1 << 13) | MASKS,
+	};
+	u32 csr = ssetable[FPSCR.RN];
+	if (FPSCR.NI)
+	{
+		// Either one of these two breaks Beyond Good & Evil.
+		// if (cpu_info.bSSSE3)
+		//     csr |= DAZ;
+		// csr |= FTZ;
+	}
+	_mm_setcsr(csr);
 }

 void mcrfs(UGeckoInstruction _inst)
@ -158,25 +149,9 @@ void mcrfs(UGeckoInstruction _inst)
 		break;
 	}
 	SetCRField(_inst.CRFD, fpflags);
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 }

-#define MXCSR_IE 1
-#define MXCSR_DE 2  // denormal
-#define MXCSR_ZE 4  // divide by zero, sticky
-#define MXCSR_OE 8  // overflow
-#define MXCSR_UE 16 // underflow
-#define MXCSR_PE 32 // precision
-#define MXCSR_DAZ 64
-#define MXCSR_IM 128
-#define MXCSR_DM 256
-#define MXCSR_ZM 512
-#define MXCSR_OM 1024
-#define MXCSR_UM 2048
-#define MXCSR_PM 4096
-#define MXCSR_ROUND (16384|8192)
-#define MXCSR_FLUSH 32768
-
 void mffsx(UGeckoInstruction _inst)
 {
 	// load from FPSCR
@ -190,14 +165,14 @@ void mffsx(UGeckoInstruction _inst)
 void mtfsb0x(UGeckoInstruction _inst)
 {
 	FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD));
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
 }

 void mtfsb1x(UGeckoInstruction _inst)
 {
 	FPSCR.Hex |= 0x80000000 >> _inst.CRBD;
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
 }

@ -206,7 +181,7 @@ void mtfsfix(UGeckoInstruction _inst)
 	u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
 	u32 imm = (_inst.hex << 16) & 0xF0000000;
 	FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
 }

@ -214,13 +189,14 @@ void mtfsfx(UGeckoInstruction _inst)
 {
 	u32 fm = _inst.FM;
 	u32 m = 0;
-	for (int i = 0; i < 8; i++) {  //7?? todo check
+	for (int i = 0; i < 8; i++)  //7?? todo check
+	{
 		if (fm & (1 << i))
-			m |= (0xF << (i*4));
+			m |= (0xF << (i * 4));
 	}

 	FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
 }

--- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp
@ -363,7 +363,7 @@ void OnIdleIL()

 void UpdateFPRF(double dvalue)
 {
-	FPSCR.FPRF = MathUtil::ClassifyFP(dvalue);
+	FPSCR.FPRF = MathUtil::ClassifyDouble(dvalue);
 	//if (FPSCR.FPRF == 0x11)
 	//	PanicAlert("QNAN alert");
 }