Improve accuracy of FPU emulation slightly - still no F-Zero improvements :(

Generic code cleanup. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3458 8ced0084-cf51-0410-be5f-012b33b47a6e
2024-09-20 11:21:43 +02:00 · 2009-06-15 21:10:11 +00:00 · 2009-06-15 21:10:11 +00:00 · 4dba267775
commit 4dba267775
parent dcae21f692
16 changed files with 355 additions and 383 deletions
--- a/Source/Core/Common/Src/MathUtil.cpp
+++ b/Source/Core/Common/Src/MathUtil.cpp
@ -32,7 +32,7 @@ static const u32 default_sse_state = _mm_getcsr();
 namespace MathUtil
 {

-int ClassifyFP(double dvalue)
+int ClassifyDouble(double dvalue)
 {
 	// TODO: Optimize the below to be as fast as possible.
 	IntDouble value;
@ -79,6 +79,53 @@ int ClassifyFP(double dvalue)
 	return 0x4;
 }

+int ClassifyFloat(float fvalue)
+{
+	// TODO: Optimize the below to be as fast as possible.
+	IntFloat value;
+	value.f = fvalue;
+	// 5 bits (C, <, >, =, ?)
+	// easy cases first
+	if (value.i == 0) {
+		// positive zero
+		return 0x2;
+	} else if (value.i == 0x80000000) {
+		// negative zero
+	   return 0x12;
+	} else if (value.i == 0x7F800000) {
+		// positive inf
+		return 0x5;
+	} else if (value.i == 0xFF800000) {
+		// negative inf
+		return 0x9;
+	} else {
+		// OK let's dissect this thing.
+		int sign = value.i >> 31;
+		int exp = (int)((value.i >> 23) & 0xFF);
+		if (exp >= 1 && exp <= 254) {
+			// Nice normalized number.
+			if (sign) {
+				return 0x8; // negative
+			} else {
+				return 0x4; // positive
+			}
+		}
+		u64 mantissa = value.i & 0x007FFFFF;
+		if (exp == 0 && mantissa) {
+			// Denormalized number.
+			if (sign) {
+				return 0x18;
+			} else {
+				return 0x14;
+			}
+		} else if (exp == 0xFF && mantissa /* && mantissa_top*/) {
+			return 0x11; // Quiet NAN
+		}
+	}
+	
+	return 0x4;
+}
+
 }  // namespace

 void LoadDefaultSSEState()
--- a/Source/Core/Common/Src/MathUtil.h
+++ b/Source/Core/Common/Src/MathUtil.h
@ -98,12 +98,9 @@ enum PPCFpClass

 // Uses PowerPC conventions for the return value, so it can be easily
 // used directly in CPU emulation.
-int ClassifyFP(double dvalue);
-
-// TODO: More efficient float version.
-inline int ClassifyFP(float fvalue) {
-	ClassifyFP((double)fvalue);
-}
+int ClassifyDouble(double dvalue);
+// More efficient float version.
+int ClassifyFloat(float fvalue);

 }  // namespace MathUtil

--- a/Source/Core/Core/Src/Core.cpp
+++ b/Source/Core/Core/Src/Core.cpp
@ -745,16 +745,16 @@ void Callback_VideoCopiedToXFB()
 		*/
 		
 		/**/
-		if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0/3.0 + 1.0/2.0)/2)) FPS_To_VPS_Rate = 1.0/3.0;
-		else if (FPS_To_VPS_Rate > ((1.0/3.0 + 1.0/2.0)/2) && FPS_To_VPS_Rate < ((1.0/2.0 + 1.0/1.0)/2)) FPS_To_VPS_Rate = 1.0/2.0;
+		if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0f/3.0f + 1.0f/2.0f)/2)) FPS_To_VPS_Rate = 1.0f/3.0f;
+		else if (FPS_To_VPS_Rate > ((1.0f/3.0f + 1.0f/2.0f)/2) && FPS_To_VPS_Rate < ((1.0f/2.0f + 1.0f/1.0f)/2)) FPS_To_VPS_Rate = 1.0/2.0;
 		else FPS_To_VPS_Rate = 1.0;	
 		// PAL patch adjustment
-		if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2;
+		if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2f;
 		
 		
 		float TargetFPS = FPS_To_VPS_Rate * (float)VideoInterface::TargetRefreshRate;
-		float FPSPercentage = (FPS / TargetFPS) * 100.0;
-		float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0;
+		float FPSPercentage = (FPS / TargetFPS) * 100.0f;
+		float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0f;
 		
 		// Settings are shown the same for both extended and summary info
 		std::string SSettings = StringFromFormat(" | Core: %s %s",
--- a/Source/Core/Core/Src/HW/VideoInterface.cpp
+++ b/Source/Core/Core/Src/HW/VideoInterface.cpp
@ -338,7 +338,9 @@ static u32 LineCount = 0;
 static u32 LinesPerField = 0;
 static u64 LastTime = 0;
 static u32 NextXFBRender = 0;
-int TargetRefreshRate = 0, SyncTicksProgress = 0; float ActualRefreshRate = 0.0;
+int TargetRefreshRate = 0;
+s64 SyncTicksProgress = 0;
+float ActualRefreshRate = 0.0;

 void DoState(PointerWrap &p)
 {
@ -1042,23 +1044,24 @@ void UpdateTiming()
 // Run when: This is run 7200 times per second on full speed
 void Update()
 {
-	
 	// Update the target refresh rate
 	TargetRefreshRate = (m_DisplayControlRegister.FMT == 0 || m_DisplayControlRegister.FMT == 2)
 		? 60 : 50;

 	// Calculate actual refresh rate
 	static u64 LastTick = 0;
-	static int UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
+	static s64 UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
 	if (UpdateCheck < (int)timeGetTime())
 	{
 		UpdateCheck = timeGetTime() + 1000;
 		TickProgress = CoreTiming::GetTicks() - LastTick;
 		// Calculated CPU-GPU synced ticks for the dual core mode too
-		NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
+		// NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
 		SyncTicksProgress += TickProgress;
 		// Multipled by two because of the way TicksPerFrame is calculated (divided by 25 and 30
 		// rather than 50 and 60)
+
+		// TODO : Feed the FPS estimate into Iulius' framelimiter.
 		ActualRefreshRate = ((float)SyncTicksProgress / (float)TicksPerFrame) * 2.0;		
 		LastTick = CoreTiming::GetTicks();
 		SyncTicksProgress = 0;
--- a/Source/Core/Core/Src/HW/VideoInterface.h
+++ b/Source/Core/Core/Src/HW/VideoInterface.h
@ -52,7 +52,11 @@ namespace VideoInterface

    // Update and draw framebuffer(s)
    void Update();
-	extern float ActualRefreshRate; extern int TargetRefreshRate, SyncTicksProgress;
+
+	// urgh, ugly externs.
+	extern float ActualRefreshRate;
+	extern int TargetRefreshRate;
+	extern s64 SyncTicksProgress;

 	// UpdateInterrupts: check if we have to generate a new VI Interrupt
 	void UpdateInterrupts();
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter.h
@ -311,7 +311,6 @@ namespace Interpreter

 	// other helper
 	u32 Helper_Mask(int mb, int me);
-	inline bool IsNAN(double _dValue);

 	extern _interpreterInstruction m_opTable[64];
 	extern _interpreterInstruction m_opTable4[1024];
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@ -34,36 +34,16 @@

 #include "../../Core.h"
 #include "Interpreter.h"
+#include "MathUtil.h"

-// SUPER MONKEY BALL IS BEING A ROYAL PAIN
-// We are missing the caller of 800070ec
+// F-ZERO IS BEING A ROYAL PAIN
 // POSSIBLE APPROACHES:
 // * Full SW FPU. Urgh.
-// * Partial SW FPU, emulate just as much as necessary for monkey ball. Feasible but a lot of work.
-// * HLE hacking. Figure out what all the evil functions really do and fake them. DONE (well, works okay-ish)
+// * Partial SW FPU, emulate just as much as necessary for f-zero. Feasible, I guess.
+// * HLE hacking. Figure out what all the evil functions really do and fake them.
+//   This worked well for Monkey Ball, not so much for F-Zero.

-// Interesting places in Super Monkey Ball:
-// 80036654: fctwixz stuff
-// 80007e08:
-//	-98: Various entry points that loads various odd fp values into f1
-// 800070b0: Estimate inverse square root.
-// 800070ec: Examine f1. Reads a value out of locked cache into f2 (fixed address). Some cases causes us to call the above thing.
-//           If all goes well, jump to 70b0, which estimates the inverse square root. 
-//           Then multiply the loaded variable with the original value of f1. Result should be the square root. (1 / sqrt(x)) * x  = x / sqrt(x) = sqrt(x)
-// 8000712c: Similar, but does not do the multiply at the end, just an frspx.
-// 8000716c: Sort of similar, but has extra junk at the end.
-//
-// 
-// 800072a4 - nightmare of nightmares
-// Fun stuff used:
-// bso+
-// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly (!!!!)
-// crclr
-// crset
-// crxor
-// fnabs
-// Super Monkey Ball reads FPRF & friends after fmadds, fmuls, frspx
-// WHY do the FR & FI flags affect it so much?
+using namespace MathUtil;

 namespace Interpreter
 {
@ -71,112 +51,68 @@ namespace Interpreter
 void UpdateFPSCR(UReg_FPSCR fp);
 void UpdateSSEState();

-
-// start of unit test - Dolphin needs more of these!
-/*
-void TestFPRF()
-{
-	UpdateFPRF(1.0);
-	if (FPSCR.FPRF != 0x4)
-		PanicAlert("Error 1");
-	UpdateFPRF(-1.0);
-	if (FPSCR.FPRF != 0x8)
-		PanicAlert("Error 2");
-	PanicAlert("Test done");
-}*/
-
-
-// extremely rare
+// Extremely rare - actually, never seen.
 void Helper_UpdateCR1(double _fValue)
 {
 	// Should just update exception flags, not do any compares.
 	PanicAlert("CR1");
 }

-inline bool IsNAN(double _dValue) 
-{ 
-	return _dValue != _dValue; 
-}
-
-inline bool _IsNAN(float x) {
-	//return ((*(u32*)&x) & 0x7f800000UL) == 0x7f800000UL && ((*(u32*)&x) & 0x007fffffUL);
-	return x != x;
-}
-
 void fcmpo(UGeckoInstruction _inst)
 {
-	/*
-	float fa = static_cast<float>(rPS0(_inst.FA));
-	float fb = static_cast<float>(rPS0(_inst.FB));
-	// normalize
-	if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
-	if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
-	*/
+	// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
+	// the real problem should be fixed instead.
+	double fa = rPS0(_inst.FA);
+	double fb = rPS0(_inst.FB);

-	// normalize if conversion to float gives denormalized number
-	if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FA) &= 0x8000000000000000ULL;
-	if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FB) &= 0x8000000000000000ULL;
-	double fa =	rPS0(_inst.FA);
-	double fb =	rPS0(_inst.FB);
-
-	u32 compareResult;
-	if (IsNAN(fa) || IsNAN(fb))  compareResult = 1;
-	else if (fa < fb)            compareResult = 8; 
-	else if (fa > fb)            compareResult = 4; 
-	else                         compareResult = 2;
+	int compareResult;
+	if (IsNAN(fa) || IsNAN(fb)) 
+	{
+		FPSCR.FX = 1;
+		compareResult = 1;
+		if (IsSNAN(fa) || IsSNAN(fb))
+		{
+			FPSCR.VXSNAN = 1;
+			if (!FPSCR.FEX || IsQNAN(fa) || IsQNAN(fb))
+				FPSCR.VXVC = 1;
+		}
+	}
+	else if (fa < fb)           compareResult = 8; 
+	else if (fa > fb)           compareResult = 4; 
+	else                        compareResult = 2;

 	FPSCR.FPRF = compareResult;
 	SetCRField(_inst.CRFD, compareResult);
-
-/* missing part
-	if ((frA) is an SNaN or (frB) is an SNaN )
-		then VXSNAN ¬ 1
-		if VE = 0
-			then VXVC ¬ 1
-		else if ((frA) is a QNaN or (frB) is a QNaN )
-		then VXVC ¬ 1 */
 }

 void fcmpu(UGeckoInstruction _inst)
 {
-	
+	// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
+	// the real problem should be fixed instead.
+	double fa = rPS0(_inst.FA);
+	double fb = rPS0(_inst.FB);

-	/*
-	float fa = static_cast<float>(rPS0(_inst.FA));
-	float fb = static_cast<float>(rPS0(_inst.FB));
-	// normalize
-	if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
-	if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
-	*/
-
-	// normalize if conversion to float gives denormalized number
-	if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FA) &= 0x8000000000000000ULL;
-	if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
-		riPS0(_inst.FB) &= 0x8000000000000000ULL;
-	double fa =	rPS0(_inst.FA);
-	double fb =	rPS0(_inst.FB);
-
-	u32 compareResult;
-	if (IsNAN(fa) || IsNAN(fb))  compareResult = 1; 
+	int compareResult;
+	if (IsNAN(fa) || IsNAN(fb))
+	{
+		FPSCR.FX = 1;
+		compareResult = 1; 
+		if (IsSNAN(fa) || IsSNAN(fb))
+		{
+			FPSCR.VXSNAN = 1;
+		}
+	}
 	else if (fa < fb)            compareResult = 8; 
 	else if (fa > fb)            compareResult = 4; 
 	else                         compareResult = 2;

 	FPSCR.FPRF = compareResult;
 	SetCRField(_inst.CRFD, compareResult);
-
-/* missing part
-	if ((frA) is an SNaN or (frB) is an SNaN)
-		then VXSNAN ¬ 1 */
 }

 // Apply current rounding mode
 void fctiwx(UGeckoInstruction _inst)
 {
-	//UpdateSSEState();
 	const double b = rPS0(_inst.FB);
 	u32 value;
 	if (b > (double)0x7fffffff)
@ -215,7 +151,6 @@ largest representable int on PowerPC. */
 // Always round toward zero
 void fctiwzx(UGeckoInstruction _inst)
 {
-	//UpdateSSEState();
 	const double b = rPS0(_inst.FB);
 	u32 value;
 	if (b > (double)0x7fffffff)
@ -282,76 +217,14 @@ void fselx(UGeckoInstruction _inst)
 // !!! warning !!!
 // PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
 // PS1 is said to be undefined
-// Super Monkey Ball is using this to do wacky tricks so we need 100% correct emulation.
 void frspx(UGeckoInstruction _inst)  // round to single
 {
-	if (true || FPSCR.RN != 0)
-	{
-		// Not used in Super Monkey Ball
-		// UpdateSSEState();
-		double b = rPS0(_inst.FB);
-		double rounded = (double)(float)b;
-		//FPSCR.FI = b != rounded;  // changing both of these affect Super Monkey Ball behaviour greatly.
-		if (Core::g_CoreStartupParameter.bEnableFPRF)
-			UpdateFPRF(rounded);
-		rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
-		return;
-		// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
-	}
-
-	// OK, let's try it in 100% software! Not yet working right.
-	union {
-		double d;
-		u64 i;
-	} in, out;
-	in.d = rPS0(_inst.FB);
-	out = in;
-	int sign = (int)(in.i >> 63);
-	int exp = (int)((in.i >> 52) & 0x7FF);
-	u64 mantissa = in.i & 0x000FFFFFFFFFFFFFULL;
-	u64 mantissa_single = mantissa & 0x000FFFFFE0000000ULL;
-	u64 leftover_single = mantissa & 0x000000001FFFFFFFULL;
-
-	// OK. First make sure that we have a "normal" number.
-	if (exp >= 1 && exp <= 2046) {
-		// OK. Check for overflow. TODO
-
-		FPSCR.FI = leftover_single != 0; // Inexact
-		if (leftover_single >= 0x10000000ULL) {
-			//PanicAlert("rounding up");
-			FPSCR.FR = 1;
-			mantissa_single += 0x20000000;
-			if (mantissa_single & 0x0010000000000000ULL) {
-				// PanicAlert("renormalizing");
-				mantissa_single >>= 1;
-				exp += 1;
-				// if (exp > 2046) { OVERFLOW }
-			}
-		}
-		out.i = ((u64)sign << 63) | ((u64)exp << 52) | mantissa_single;
-	} else {
-		if (!exp && !mantissa) {
-			// Positive or negative Zero. All is well.
-			FPSCR.FI = 0;
-			FPSCR.FR = 0;
-		} else if (exp == 0 && mantissa) {
-			// Denormalized number.
-			PanicAlert("denorm");
-		} else if (exp == 2047 && !mantissa) {
-			// Infinite.
-			//PanicAlert("infinite");
-			FPSCR.FI = 1;
-			FPSCR.FR = 1;
-//			FPSCR.OX = 1;
-		} else {
-			//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
-		}
-	}
-
-	UpdateFPRF(out.d);
-	rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
-
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+	double b = rPS0(_inst.FB);
+	double rounded = (double)(float)b;
+	//FPSCR.FI = b != rounded;
+	UpdateFPRF(rounded);
+	rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
+	return;
 }


@ -394,11 +267,13 @@ void fmaddsx(UGeckoInstruction _inst)
 void faddx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void faddsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -407,51 +282,79 @@ void fdivx(UGeckoInstruction _inst)
 {
 	double a = rPS0(_inst.FA);
 	double b = rPS0(_inst.FB);
-	if (a == 0.0f && b == 0.0f)
-		rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0;  // NAN?
-	else
-		rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
-	if (fabs(rPS0(_inst.FB)) == 0.0) {
-		if (!FPSCR.ZX)
-			FPSCR.FX = 1;
-		FPSCR.ZX = 1;
-		FPSCR.XX = 1;
-	}
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-void fdivsx(UGeckoInstruction _inst)
-{
-	float a = rPS0(_inst.FA);
-	float b = rPS0(_inst.FB);
-	if (a != a || b != b)
-		rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0;  // NAN?
-	else
-		rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
+	rPS0(_inst.FD) = a / b;
 	if (b == 0.0) {
 		if (!FPSCR.ZX)
 			FPSCR.FX = 1;
 		FPSCR.ZX = 1;
 		FPSCR.XX = 1;
 	}
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
-void fresx(UGeckoInstruction _inst)
+void fdivsx(UGeckoInstruction _inst)
 {
-	double b = rPS0(_inst.FB);
-	rPS0(_inst.FD) = rPS1(_inst.FD) = 1.0 / b;
-	if (fabs(rPS0(_inst.FB)) == 0.0) {
+	float a = (float)rPS0(_inst.FA);
+	float b = (float)rPS0(_inst.FB);
+	rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
+	if (b == 0.0)
+	{
 		if (!FPSCR.ZX)
 			FPSCR.FX = 1;
 		FPSCR.ZX = 1;
 		FPSCR.XX = 1;
 	}
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));  
+}
+
+// Single precision only.
+void fresx(UGeckoInstruction _inst)
+{
+	float b = (float)rPS0(_inst.FB);
+	float one_over = 1.0f / b;
+	rPS0(_inst.FD) = rPS1(_inst.FD) = one_over;
+	if (b == 0.0)
+	{
+		if (!FPSCR.ZX)
+			FPSCR.FX = 1;
+		FPSCR.ZX = 1;
+		FPSCR.XX = 1;
+	}
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

+void frsqrtex(UGeckoInstruction _inst)
+{
+	float b = (float)rPS0(_inst.FB);
+	if (b < 0.0) {
+		FPSCR.VXSQRT = 1;
+	} else if (b == 0) {
+		FPSCR.ZX = 1;
+	}
+	rPS0(_inst.FD) = 1.0f / sqrtf(b);	
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void fsqrtx(UGeckoInstruction _inst)
+{
+	// GEKKO is not supposed to support this instruction.
+	// PanicAlert("fsqrtx");
+	double b = rPS0(_inst.FB);
+	if (b < 0.0) {
+		FPSCR.VXSQRT = 1;
+	}
+	rPS0(_inst.FD) = sqrt(b);
+ 	UpdateFPRF(rPS0(_inst.FD));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}

 void fmsubx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -459,6 +362,7 @@ void fmsubsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) =
 		static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -466,12 +370,14 @@ void fmsubsx(UGeckoInstruction _inst)
 void fnmaddx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void fnmaddsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = 
 		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -479,12 +385,14 @@ void fnmaddsx(UGeckoInstruction _inst)
 void fnmsubx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void fnmsubsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = 
 		static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); 
 }

@ -492,32 +400,13 @@ void fnmsubsx(UGeckoInstruction _inst)
 void fsubx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }
 void fsubsx(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void frsqrtex(UGeckoInstruction _inst)
-{
-	double b = rPS0(_inst.FB);
-	if (b <= 0.0)
-		rPS0(_inst.FD) = 0.0;
-	else
-		rPS0(_inst.FD) = 1.0f / (sqrt(b));
-	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
-}
-
-void fsqrtx(UGeckoInstruction _inst)
-{
-	double b = rPS0(_inst.FB);
-	if (b < 0.0)
-	{
-		FPSCR.VXSQRT = 1;
-	}
-	rPS0(_inst.FD) = sqrt(b);
+ 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Integer.cpp
@ -493,7 +493,7 @@ void divwux(UGeckoInstruction _inst)
 	u32 a = m_GPR[_inst.RA];
 	u32 b = m_GPR[_inst.RB];

-	if (b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF))
+	if (b == 0) // || (a == 0x80000000 && b == 0xFFFFFFFF))
 	{
 		if (_inst.OE) 
 			PanicAlert("OE: divwux");
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
@ -15,6 +15,9 @@
 // Official SVN repository and contact information can be found at
 // http://code.google.com/p/dolphin-emu/

+#include "Common.h"
+#include "MathUtil.h"
+
 #include "../../HW/Memmap.h"
 #include "../../HW/CommandProcessor.h"
 #include "../../HW/PixelEngine.h"
@ -92,16 +95,18 @@ void lfdx(UGeckoInstruction _inst)
 void lfs(UGeckoInstruction _inst)
 {
 	u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 }

 void lfsu(UGeckoInstruction _inst)
 {
 	u32 uAddress = Helper_Get_EA_U(_inst);
 	u32 uTemp = Memory::Read_U32(uAddress);
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 	m_GPR[_inst.RA] = uAddress;
 }

@ -109,16 +114,18 @@ void lfsux(UGeckoInstruction _inst)
 {
 	u32 uAddress = Helper_Get_EA_UX(_inst);
 	u32 uTemp = Memory::Read_U32(uAddress);
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 	m_GPR[_inst.RA] = uAddress;
 }

 void lfsx(UGeckoInstruction _inst)
 {
 	u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
-	rPS0(_inst.FD) = *(float*)&uTemp;
-	rPS1(_inst.FD) = rPS0(_inst.FD);
+	double value = *(float*)&uTemp;
+	rPS0(_inst.FD) = value;
+	rPS1(_inst.FD) = value;
 }

 void lha(UGeckoInstruction _inst)
@ -227,7 +234,8 @@ void stfdu(UGeckoInstruction _inst)

 void stfs(UGeckoInstruction _inst)
 {
-	float fTemp = (float)rPS0(_inst.FS);
+	double value = rPS0(_inst.FS);
+	float fTemp = (float)value;
 	Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
 }

@ -453,27 +461,20 @@ void stfiwx(UGeckoInstruction _inst)
 	Memory::Write_U32((u32)riPS0(_inst.FS), uAddress);
 }

-// __________________________________________________________________________________________________
-// stfsux
-//
-// no paired ??
-//
+
 void stfsux(UGeckoInstruction _inst)
 {
-	float fTemp = (float)rPS0(_inst.FS);
+	double value = rPS0(_inst.FS);
+	float fTemp = (float)value;
 	u32 uAddress = Helper_Get_EA_UX(_inst);
 	Memory::Write_U32(*(u32*)&fTemp, uAddress);
 	m_GPR[_inst.RA] = uAddress;
 }

-// __________________________________________________________________________________________________
-// stfsx
-//
-// no paired ??
-//
 void stfsx(UGeckoInstruction _inst)
 {
-	float fTemp = (float)rPS0(_inst.FS);
+	double value = rPS0(_inst.FS);
+	float fTemp = (float)value;
 	Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst));
 }

--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp
@ -16,41 +16,52 @@
 // http://code.google.com/p/dolphin-emu/

 #include <math.h>
+#include "Common.h"
+#include "MathUtil.h"
 #include "Interpreter.h"
 #include "../../HW/Memmap.h"

+using namespace MathUtil;
+
 namespace Interpreter
 {

 // These "binary instructions" do not alter FPSCR.
 void ps_sel(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB));
-	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) >= -0.0) ? rPS1(_inst.FC) : rPS1(_inst.FB));
+	rPS0(_inst.FD) = !IsNAN(rPS0(_inst.FA)) && rPS0(_inst.FA) >= -0.0 ?
+		              rPS0(_inst.FC) : rPS0(_inst.FB);
+	rPS1(_inst.FD) = !IsNAN(rPS1(_inst.FA)) && rPS1(_inst.FA) >= -0.0 ?
+		              rPS1(_inst.FC) : rPS1(_inst.FB);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_neg(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63);
 	riPS1(_inst.FD) = riPS1(_inst.FB) ^ (1ULL << 63);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_mr(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = rPS0(_inst.FB);
 	rPS1(_inst.FD) = rPS1(_inst.FB);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_nabs(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63); 
 	riPS1(_inst.FD) = riPS1(_inst.FB) | (1ULL << 63); 
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_abs(UGeckoInstruction _inst)
 {
 	riPS0(_inst.FD) = riPS0(_inst.FB) &~ (1ULL << 63); 
 	riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63); 
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 // These are just moves, double is OK.
@ -60,6 +71,7 @@ void ps_merge00(UGeckoInstruction _inst)
 	double p1 = rPS0(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_merge01(UGeckoInstruction _inst)
@ -68,6 +80,7 @@ void ps_merge01(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_merge10(UGeckoInstruction _inst)
@ -76,6 +89,7 @@ void ps_merge10(UGeckoInstruction _inst)
 	double p1 = rPS0(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_merge11(UGeckoInstruction _inst)
@ -84,6 +98,7 @@ void ps_merge11(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }


@ -97,63 +112,75 @@ void ps_div(UGeckoInstruction _inst)
 	if (fabs(rPS0(_inst.FB)) == 0.0) {
 		FPSCR.ZX = 1;
 	}
-}
-
-void ps_sub(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
-	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
-}
-
-void ps_add(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
-	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_res(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB));
 	rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void ps_rsqrte(UGeckoInstruction _inst)
+{
+	// PanicAlert("ps_rsqrte");
+	rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
+	rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
+	if (fabs(rPS0(_inst.FB)) == 0.0) {
+		FPSCR.ZX = 1;
+	}
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void ps_sub(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
+}
+
+void ps_add(UGeckoInstruction _inst)
+{
+	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
+	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_mul(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
 	rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

-void ps_rsqrte(UGeckoInstruction _inst)
-{
-	rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
-	rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
-	if (fabs(rPS0(_inst.FB)) == 0.0) {
-		FPSCR.ZX = 1;
-	}
-}

 void ps_msub(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
 	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_madd(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
 	rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_nmsub(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB)));
 	rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB)));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_nmadd(UGeckoInstruction _inst)
 {
 	rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB)));
 	rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB)));
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_sum0(UGeckoInstruction _inst)
@ -162,6 +189,7 @@ void ps_sum0(UGeckoInstruction _inst)
 	double p1 = (float)(rPS1(_inst.FC));
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_sum1(UGeckoInstruction _inst)
@ -170,6 +198,7 @@ void ps_sum1(UGeckoInstruction _inst)
 	double p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_muls0(UGeckoInstruction _inst)
@ -178,6 +207,7 @@ void ps_muls0(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_muls1(UGeckoInstruction _inst)
@ -186,6 +216,7 @@ void ps_muls1(UGeckoInstruction _inst)
 	double p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_madds0(UGeckoInstruction _inst)
@ -194,6 +225,7 @@ void ps_madds0(UGeckoInstruction _inst)
 	double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_madds1(UGeckoInstruction _inst)
@ -202,6 +234,7 @@ void ps_madds1(UGeckoInstruction _inst)
 	double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
 	rPS0(_inst.FD) = p0;
 	rPS1(_inst.FD) = p1;
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_cmpu0(UGeckoInstruction _inst)
@ -209,10 +242,12 @@ void ps_cmpu0(UGeckoInstruction _inst)
 	double fa = rPS0(_inst.FA);
 	double fb = rPS0(_inst.FB);
 	int compareResult;
-	if (fa < fb)		compareResult = 8; 
-	else if (fa > fb) 	compareResult = 4; 
-	else				compareResult = 2;
+	if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
+	else if (fa < fb)         	compareResult = 8; 
+	else if (fa > fb)        	compareResult = 4; 
+	else			        	compareResult = 2;
 	SetCRField(_inst.CRFD, compareResult);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_cmpo0(UGeckoInstruction _inst)
@ -226,10 +261,12 @@ void ps_cmpu1(UGeckoInstruction _inst)
 	double fa = rPS1(_inst.FA);
 	double fb = rPS1(_inst.FB);
 	int compareResult;
-	if (fa < fb)		compareResult = 8; 
-	else if (fa > fb)	compareResult = 4; 
-	else				compareResult = 2;
+	if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
+	else if (fa < fb)         	compareResult = 8; 
+	else if (fa > fb)        	compareResult = 4; 
+	else			        	compareResult = 2;
 	SetCRField(_inst.CRFD, compareResult);
+	if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
 }

 void ps_cmpo1(UGeckoInstruction _inst)
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@ -35,6 +35,7 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
 #include <xmmintrin.h>
 #endif

+#include "CPUDetect.h"
 #include "../../CoreTiming.h"
 #include "../../HW/Memmap.h"
 #include "../../HW/GPFifo.h"
@ -60,37 +61,11 @@ mffsx: 80036650 (huh?)
 namespace Interpreter
 {

-void UpdateSSEState()
-{
-	u32 csr = _mm_getcsr();
-	
-	const int ssetable[4] = 
-	{
-		0,
-		3,
-		2,
-		1,
-	};
-	csr = csr & 0x9FFF;
-	csr |= ssetable[FPSCR.RN] << 13;
+const u32 MASKS = 0x1F80;  // mask away the interrupts.
+const u32 DAZ = 0x40;
+const u32 FTZ = 0x8000;

-	// Also handle denormals as zero (FZ + DAZ)
-	csr &= ~0x8020;
-
-	// SETTING FTZ+DAZ KILLS BEYOND GOOD AND EVIL
-	//if (daz)
-	//	csr |= 0x20; // Only set DAZ  //0x8020;
-	
-	_mm_setcsr(csr);
-}
-
-void RestoreSSEState()
-{
-	// A reasonable default
-	_mm_setcsr(0x1fa0);
-}
-
-void UpdateFPSCR(UReg_FPSCR fp)
+void FPSCRtoFPUSettings(UReg_FPSCR fp)
 {
 	// Set FPU rounding mode to mimic the PowerPC's
 #ifdef _M_IX86
@ -120,12 +95,28 @@ void UpdateFPSCR(UReg_FPSCR fp)
 #endif
 	if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
 	{
-		// PanicAlert("FPSCR - exceptions enabled. Please report.");
+		//PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
+		//	fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
 		// Pokemon Colosseum does this. Gah.
 	}

 	// Also corresponding SSE rounding mode setting
-	UpdateSSEState();
+	static const u32 ssetable[4] = 
+	{
+		(0 << 13) | MASKS,
+		(3 << 13) | MASKS,
+		(2 << 13) | MASKS,
+		(1 << 13) | MASKS,
+	};
+	u32 csr = ssetable[FPSCR.RN];
+	if (FPSCR.NI)
+	{
+		// Either one of these two breaks Beyond Good & Evil.
+		// if (cpu_info.bSSSE3)
+		//     csr |= DAZ;
+		// csr |= FTZ;
+	}
+	_mm_setcsr(csr);
 }

 void mcrfs(UGeckoInstruction _inst)
@ -158,25 +149,9 @@ void mcrfs(UGeckoInstruction _inst)
 		break;
 	}
 	SetCRField(_inst.CRFD, fpflags);
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 }

-#define MXCSR_IE 1
-#define MXCSR_DE 2  // denormal
-#define MXCSR_ZE 4  // divide by zero, sticky
-#define MXCSR_OE 8  // overflow
-#define MXCSR_UE 16 // underflow
-#define MXCSR_PE 32 // precision
-#define MXCSR_DAZ 64
-#define MXCSR_IM 128
-#define MXCSR_DM 256
-#define MXCSR_ZM 512
-#define MXCSR_OM 1024
-#define MXCSR_UM 2048
-#define MXCSR_PM 4096
-#define MXCSR_ROUND (16384|8192)
-#define MXCSR_FLUSH 32768
-
 void mffsx(UGeckoInstruction _inst)
 {
 	// load from FPSCR
@ -190,14 +165,14 @@ void mffsx(UGeckoInstruction _inst)
 void mtfsb0x(UGeckoInstruction _inst)
 {
 	FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD));
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
 }

 void mtfsb1x(UGeckoInstruction _inst)
 {
 	FPSCR.Hex |= 0x80000000 >> _inst.CRBD;
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
 }

@ -206,7 +181,7 @@ void mtfsfix(UGeckoInstruction _inst)
 	u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
 	u32 imm = (_inst.hex << 16) & 0xF0000000;
 	FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
 }

@ -214,13 +189,14 @@ void mtfsfx(UGeckoInstruction _inst)
 {
 	u32 fm = _inst.FM;
 	u32 m = 0;
-	for (int i = 0; i < 8; i++) {  //7?? todo check
+	for (int i = 0; i < 8; i++)  //7?? todo check
+	{
 		if (fm & (1 << i))
-			m |= (0xF << (i*4));
+			m |= (0xF << (i * 4));
 	}

 	FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
-	UpdateFPSCR(FPSCR);
+	FPSCRtoFPUSettings(FPSCR);
 	if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
 }

--- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp
@ -363,7 +363,7 @@ void OnIdleIL()

 void UpdateFPRF(double dvalue)
 {
-	FPSCR.FPRF = MathUtil::ClassifyFP(dvalue);
+	FPSCR.FPRF = MathUtil::ClassifyDouble(dvalue);
 	//if (FPSCR.FPRF == 0x11)
 	//	PanicAlert("QNAN alert");
 }
--- a/Source/DSPSpy/DSPSpy.vcproj
+++ b/Source/DSPSpy/DSPSpy.vcproj
@ -122,6 +122,10 @@
 			RelativePath=".\ConsoleHelper.h"
 			>
 		</File>
+		<File
+			RelativePath=".\tests\dsp_base.inc"
+			>
+		</File>
 		<File
 			RelativePath=".\dsp_interface.cpp"
 			>
--- a/Source/Plugins/Plugin_DSP_HLE/Src/HLEMixer.cpp
+++ b/Source/Plugins/Plugin_DSP_HLE/Src/HLEMixer.cpp
@ -13,30 +13,30 @@
 // If not, see http://www.gnu.org/licenses/

 // Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
-
-#include "Config.h" // Local
-#include "Globals.h"
-#include "DSPHandler.h"
-#include "HLEMixer.h"
-
-void HLEMixer::MixUCode(short *samples, int numSamples) {
-	// if this was called directly from the HLE, and not by timeout
-	if (g_Config.m_EnableHLEAudio && IsHLEReady()) {
-		IUCode* pUCode = CDSPHandler::GetInstance().GetUCode();
-		if (pUCode != NULL)
-			pUCode->MixAdd(samples, numSamples);
-	}
-}
-
-void HLEMixer::Premix(short *samples, int numSamples) {
-	
-	// first get the DTK Music
-	//	if (g_Config.m_EnableDTKMusic) {
-	//		g_dspInitialize.pGetAudioStreaming(samples, numSamples);
-	//	}
-
-	MixUCode(samples, numSamples);
-}
-
-
+// http://code.google.com/p/dolphin-emu/
+
+#include "Config.h" // Local
+#include "Globals.h"
+#include "DSPHandler.h"
+#include "HLEMixer.h"
+
+void HLEMixer::MixUCode(short *samples, int numSamples) {
+	// if this was called directly from the HLE, and not by timeout
+	if (g_Config.m_EnableHLEAudio && IsHLEReady()) {
+		IUCode* pUCode = CDSPHandler::GetInstance().GetUCode();
+		if (pUCode != NULL)
+			pUCode->MixAdd(samples, numSamples);
+	}
+}
+
+void HLEMixer::Premix(short *samples, int numSamples) {
+	
+	// first get the DTK Music
+	//	if (g_Config.m_EnableDTKMusic) {
+	//		g_dspInitialize.pGetAudioStreaming(samples, numSamples);
+	//	}
+
+	MixUCode(samples, numSamples);
+}
+
+
--- a/Source/Plugins/Plugin_Wiimote/Src/EmuSubroutines.cpp
+++ b/Source/Plugins/Plugin_Wiimote/Src/EmuSubroutines.cpp
@ -285,8 +285,8 @@ void WmReadData(u16 _channelID, wm_read_data* rd)
 					size, address, (address & 0xffff), Tmp.c_str());*/
 			break;
 		default:
-			ERROR_LOG(WII_IPC_WIIMOTE, "WmWriteData: bad register block!");
-			PanicAlert("WmWriteData: bad register block!");
+			ERROR_LOG(WII_IPC_WIIMOTE, "WmReadData: bad register block!");
+			PanicAlert("WmReadData: bad register block!");
 			return;
 		}

@ -340,6 +340,7 @@ void WmReadData(u16 _channelID, wm_read_data* rd)

 	INFO_LOG(WII_IPC_WIIMOTE, "===========================================================");
 }
+
 // ===================================================
 /* Here we produce the actual 0x21 Input report that we send to the Wii. The message
   is divided into 16 bytes pieces and sent piece by piece. There will be five formatting
--- a/Source/UnitTests/UnitTests.cpp
+++ b/Source/UnitTests/UnitTests.cpp
@ -55,21 +55,35 @@ void CoreTests()
 void MathTests()
 {
 	// Tests that our fp classifier is correct.
-	EXPECT_EQ(MathUtil::ClassifyFP(1.0), MathUtil::PPC_FPCLASS_PN);
-	EXPECT_EQ(MathUtil::ClassifyFP(-1.0), 0x8);
-	EXPECT_EQ(MathUtil::ClassifyFP(1235223.0), 0x4);
-	EXPECT_EQ(MathUtil::ClassifyFP(-126323521.0), 0x8);
-	EXPECT_EQ(MathUtil::ClassifyFP(1.0E-308), 0x14);
-	EXPECT_EQ(MathUtil::ClassifyFP(-1.0E-308), 0x18);
-	EXPECT_EQ(MathUtil::ClassifyFP(0.0), 0x2);
-	EXPECT_EQ(MathUtil::ClassifyFP(-0.0), 0x12);
-	EXPECT_EQ(MathUtil::ClassifyFP(HUGE_VAL), 0x5);  // weird #define for infinity
-	EXPECT_EQ(MathUtil::ClassifyFP(-HUGE_VAL), 0x9);
-	EXPECT_EQ(MathUtil::ClassifyFP(sqrt(-1.0)), 0x11);  // SNAN
+	EXPECT_EQ(MathUtil::ClassifyDouble(1.0),        MathUtil::PPC_FPCLASS_PN);
+	EXPECT_EQ(MathUtil::ClassifyDouble(-1.0),       MathUtil::PPC_FPCLASS_NN);
+	EXPECT_EQ(MathUtil::ClassifyDouble(1235223.0),  MathUtil::PPC_FPCLASS_PN);
+	EXPECT_EQ(MathUtil::ClassifyDouble(-1263221.0), MathUtil::PPC_FPCLASS_NN);
+	EXPECT_EQ(MathUtil::ClassifyDouble(1.0E-308),   MathUtil::PPC_FPCLASS_PD);
+	EXPECT_EQ(MathUtil::ClassifyDouble(-1.0E-308),  MathUtil::PPC_FPCLASS_ND);
+	EXPECT_EQ(MathUtil::ClassifyDouble(0.0),        MathUtil::PPC_FPCLASS_PZ);
+	EXPECT_EQ(MathUtil::ClassifyDouble(-0.0),       MathUtil::PPC_FPCLASS_NZ);
+	EXPECT_EQ(MathUtil::ClassifyDouble(HUGE_VAL),   MathUtil::PPC_FPCLASS_PINF);  // weird #define for infinity
+	EXPECT_EQ(MathUtil::ClassifyDouble(-HUGE_VAL),  MathUtil::PPC_FPCLASS_NINF);
+	EXPECT_EQ(MathUtil::ClassifyDouble(sqrt(-1.0)), MathUtil::PPC_FPCLASS_QNAN);
+
+	// Float version
+	EXPECT_EQ(MathUtil::ClassifyFloat(1.0f),        MathUtil::PPC_FPCLASS_PN);
+	EXPECT_EQ(MathUtil::ClassifyFloat(-1.0f),       MathUtil::PPC_FPCLASS_NN);
+	EXPECT_EQ(MathUtil::ClassifyFloat(1235223.0f),  MathUtil::PPC_FPCLASS_PN);
+	EXPECT_EQ(MathUtil::ClassifyFloat(-1263221.0f), MathUtil::PPC_FPCLASS_NN);
+	EXPECT_EQ(MathUtil::ClassifyFloat(1.0E-43f),    MathUtil::PPC_FPCLASS_PD);
+	EXPECT_EQ(MathUtil::ClassifyFloat(-1.0E-43f),   MathUtil::PPC_FPCLASS_ND);
+	EXPECT_EQ(MathUtil::ClassifyFloat(0.0f),        MathUtil::PPC_FPCLASS_PZ);
+	EXPECT_EQ(MathUtil::ClassifyFloat(-0.0f),       MathUtil::PPC_FPCLASS_NZ);
+	EXPECT_EQ(MathUtil::ClassifyFloat((float)HUGE_VAL),  MathUtil::PPC_FPCLASS_PINF);  // weird #define for infinity
+	EXPECT_EQ(MathUtil::ClassifyFloat((float)-HUGE_VAL), MathUtil::PPC_FPCLASS_NINF);
+	EXPECT_EQ(MathUtil::ClassifyFloat(sqrtf(-1.0f)),     MathUtil::PPC_FPCLASS_QNAN);

 	EXPECT_FALSE(MathUtil::IsNAN(1.0));
 	EXPECT_TRUE(MathUtil::IsNAN(sqrt(-1.0)));
 	EXPECT_FALSE(MathUtil::IsSNAN(sqrt(-1.0)));
+
 	// EXPECT_TRUE(MathUtil::IsQNAN(sqrt(-1.0)));  // Hmm...
 	EXPECT_EQ(pow2(2.0), 4.0);
 	EXPECT_EQ(pow2(-2.0), 4.0);