mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-22 22:00:39 -06:00
Interpreter: software-based flush-to-zero
bDAZ is now called bFlushToZero to better reflect what it's actually used for. I decided not to support any hardware-based flush-to-zero on systems that don't support this for both inputs _and_ outputs. It makes the code cleaner and the intersection of CPUs that support SSE2 but not DAZ should be very small.
This commit is contained in:
@ -45,7 +45,10 @@ struct CPUInfo
|
||||
bool bAES;
|
||||
// FXSAVE/FXRSTOR
|
||||
bool bFXSR;
|
||||
bool bDAZ;
|
||||
// This flag indicates that the hardware supports some mode
|
||||
// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
|
||||
// TODO: ARM
|
||||
bool bFlushToZero;
|
||||
bool bLAHFSAHF64;
|
||||
bool bLongMode;
|
||||
|
||||
|
@ -64,10 +64,10 @@ inline float FlushToZero(float f)
|
||||
return x.f;
|
||||
}
|
||||
|
||||
inline double FlushToZeroAsFloat(double d)
|
||||
inline double FlushToZero(double d)
|
||||
{
|
||||
IntDouble x; x.d = d;
|
||||
if ((x.i & DOUBLE_EXP) < 0x3800000000000000ULL)
|
||||
if ((x.i & DOUBLE_EXP) == 0)
|
||||
x.i &= DOUBLE_SIGN; // turn into signed zero
|
||||
return x.d;
|
||||
}
|
||||
|
@ -162,6 +162,7 @@ void CPUInfo::Detect()
|
||||
if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
|
||||
if ((cpu_id[2] >> 25) & 1) bAES = true;
|
||||
|
||||
// To check DAZ support, we first need to check FXSAVE support.
|
||||
if ((cpu_id[3] >> 24) & 1)
|
||||
{
|
||||
// We can use FXSAVE.
|
||||
@ -181,7 +182,12 @@ void CPUInfo::Detect()
|
||||
|
||||
// lowest byte of MXCSR_MASK
|
||||
if ((fx_state[0x1C] >> 6) & 1)
|
||||
bDAZ = true;
|
||||
{
|
||||
// On x86, the FTZ field (supported since SSE1) only flushes denormal _outputs_ to zero,
|
||||
// now that we checked DAZ support (flushing denormal _inputs_ to zero),
|
||||
// we can set our generic flag.
|
||||
bFlushToZero = true;
|
||||
}
|
||||
}
|
||||
|
||||
// AVX support requires 3 separate checks:
|
||||
|
@ -103,7 +103,7 @@ namespace FPURoundMode
|
||||
};
|
||||
if (nonIEEEMode)
|
||||
{
|
||||
csr |= denormalLUT[cpu_info.bDAZ];
|
||||
csr |= denormalLUT[cpu_info.bFlushToZero];
|
||||
}
|
||||
_mm_setcsr(csr);
|
||||
}
|
||||
|
Reference in New Issue
Block a user