Merge branch 'ppc_fp'

2025-07-24 14:49:42 -06:00 · 2013-11-18 19:31:09 +01:00
parent e805bf6068 288bef2807
commit b863e40677
11 changed files with 93 additions and 50 deletions
--- a/Source/Core/Common/Src/CPUDetect.h
+++ b/Source/Core/Common/Src/CPUDetect.h
@ -43,6 +43,12 @@ struct CPUInfo
 	bool bAVX;
 	bool bFMA;
 	bool bAES;
+	// FXSAVE/FXRSTOR
+	bool bFXSR;
+	// This flag indicates that the hardware supports some mode
+	// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
+	// TODO: ARM
+	bool bFlushToZero;
 	bool bLAHFSAHF64;
 	bool bLongMode;

--- a/Source/Core/Common/Src/FPURoundMode.h
+++ b/Source/Core/Common/Src/FPURoundMode.h
@ -36,7 +36,7 @@ namespace FPURoundMode

 	void SetPrecisionMode(u32 mode);

-	void SetSIMDMode(u32 mode);
+	void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode);

 /*
 * There are two different flavors of float to int conversion:
--- a/Source/Core/Common/Src/GenericFPURoundMode.cpp
+++ b/Source/Core/Common/Src/GenericFPURoundMode.cpp
@ -26,7 +26,7 @@ namespace FPURoundMode
 	void SetPrecisionMode(u32 mode)
 	{
 	}
-	void SetSIMDMode(u32 mode)
+	void SetSIMDMode(u32 mode, u32 nonIEEEMode)
 	{
 	}
 	void SaveSIMDState()
--- a/Source/Core/Common/Src/MathUtil.h
+++ b/Source/Core/Common/Src/MathUtil.h
@ -64,10 +64,10 @@ inline float FlushToZero(float f)
 	return x.f;
 }

-inline double FlushToZeroAsFloat(double d)
+inline double FlushToZero(double d)
 {
 	IntDouble x; x.d = d;
-	if ((x.i & DOUBLE_EXP) < 0x3800000000000000ULL)
+	if ((x.i & DOUBLE_EXP) == 0)
 		x.i &= DOUBLE_SIGN;  // turn into signed zero
 	return x.d;
 }
--- a/Source/Core/Common/Src/x64CPUDetect.cpp
+++ b/Source/Core/Common/Src/x64CPUDetect.cpp
@ -162,6 +162,34 @@ void CPUInfo::Detect()
 		if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
 		if ((cpu_id[2] >> 25) & 1) bAES = true;

+		// To check DAZ support, we first need to check FXSAVE support.
+		if ((cpu_id[3] >> 24) & 1)
+		{
+			// We can use FXSAVE.
+			bFXSR = true;
+
+			GC_ALIGNED16(u8 fx_state[512]);
+			memset(fx_state, 0, sizeof(fx_state));
+#ifdef _WIN32
+#ifdef _M_IX86
+			_fxsave(fx_state);
+#elif defined (_M_X64)
+			_fxsave64(fx_state);
+#endif
+#else
+			__asm__("fxsave %0" : "=m" (fx_state));
+#endif
+
+			// lowest byte of MXCSR_MASK
+			if ((fx_state[0x1C] >> 6) & 1)
+			{
+				// On x86, the FTZ field (supported since SSE1) only flushes denormal _outputs_ to zero,
+				// now that we checked DAZ support (flushing denormal _inputs_ to zero),
+				// we can set our generic flag.
+				bFlushToZero = true;
+			}
+		}
+
 		// AVX support requires 3 separate checks:
 		//  - Is the AVX bit set in CPUID?
 		//  - Is the XSAVE bit set in CPUID?
@ -222,7 +250,12 @@ std::string CPUInfo::Summarize()
 {
 	std::string sum(cpu_string);
 	if (bSSE) sum += ", SSE";
-	if (bSSE2) sum += ", SSE2";
+	if (bSSE2)
+	{
+		sum += ", SSE2";
+		if (!bFlushToZero)
+			sum += " (but not DAZ!)";
+	}
 	if (bSSE3) sum += ", SSE3";
 	if (bSSSE3) sum += ", SSSE3";
 	if (bSSE4_1) sum += ", SSE4.1";
--- a/Source/Core/Common/Src/x64FPURoundMode.cpp
+++ b/Source/Core/Common/Src/x64FPURoundMode.cpp
@ -4,6 +4,7 @@

 #include "Common.h"
 #include "FPURoundMode.h"
+#include "CPUDetect.h"

 #ifndef _WIN32
 static const unsigned short FPU_ROUND_NEAR = 0 << 10;
@ -14,8 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
 #include <xmmintrin.h>
 #endif

-const u32 MASKS = 0x1F80;  // mask away the interrupts.
+// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
+const u32 EXCEPTION_MASK = 0x1F80;
+// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
 const u32 DAZ = 0x40;
+// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
 const u32 FTZ = 0x8000;

 namespace FPURoundMode
@ -79,16 +83,28 @@ namespace FPURoundMode
 			//but still - set any useful sse options here
 		#endif
 	}
-	void SetSIMDMode(u32 mode)
+
+	void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode)
 	{
-		static const u32 ssetable[4] =
+		// lookup table for FPSCR.RN-to-MXCSR.RC translation
+		static const u32 roundingModeLUT[4] =
 		{
-			(0 << 13) | MASKS,
-			(3 << 13) | MASKS,
-			(2 << 13) | MASKS,
-			(1 << 13) | MASKS,
+			(0 << 13) | EXCEPTION_MASK, // nearest
+			(3 << 13) | EXCEPTION_MASK, // -inf
+			(2 << 13) | EXCEPTION_MASK, // +inf
+			(1 << 13) | EXCEPTION_MASK, // zero
 		};
-		u32 csr = ssetable[mode];
+		u32 csr = roundingModeLUT[roundingMode];
+
+		static const u32 denormalLUT[2] =
+		{
+			FTZ,       // flush-to-zero only
+			FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
+		};
+		if (nonIEEEMode)
+		{
+			csr |= denormalLUT[cpu_info.bFlushToZero];
+		}
 		_mm_setcsr(csr);
 	}