mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-24 14:49:42 -06:00
Merge branch 'ppc_fp'
This commit is contained in:
@ -43,6 +43,12 @@ struct CPUInfo
|
||||
bool bAVX;
|
||||
bool bFMA;
|
||||
bool bAES;
|
||||
// FXSAVE/FXRSTOR
|
||||
bool bFXSR;
|
||||
// This flag indicates that the hardware supports some mode
|
||||
// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
|
||||
// TODO: ARM
|
||||
bool bFlushToZero;
|
||||
bool bLAHFSAHF64;
|
||||
bool bLongMode;
|
||||
|
||||
|
@ -36,7 +36,7 @@ namespace FPURoundMode
|
||||
|
||||
void SetPrecisionMode(u32 mode);
|
||||
|
||||
void SetSIMDMode(u32 mode);
|
||||
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode);
|
||||
|
||||
/*
|
||||
* There are two different flavors of float to int conversion:
|
||||
|
@ -26,7 +26,7 @@ namespace FPURoundMode
|
||||
void SetPrecisionMode(u32 mode)
|
||||
{
|
||||
}
|
||||
void SetSIMDMode(u32 mode)
|
||||
void SetSIMDMode(u32 mode, u32 nonIEEEMode)
|
||||
{
|
||||
}
|
||||
void SaveSIMDState()
|
||||
|
@ -64,10 +64,10 @@ inline float FlushToZero(float f)
|
||||
return x.f;
|
||||
}
|
||||
|
||||
inline double FlushToZeroAsFloat(double d)
|
||||
inline double FlushToZero(double d)
|
||||
{
|
||||
IntDouble x; x.d = d;
|
||||
if ((x.i & DOUBLE_EXP) < 0x3800000000000000ULL)
|
||||
if ((x.i & DOUBLE_EXP) == 0)
|
||||
x.i &= DOUBLE_SIGN; // turn into signed zero
|
||||
return x.d;
|
||||
}
|
||||
|
@ -162,6 +162,34 @@ void CPUInfo::Detect()
|
||||
if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
|
||||
if ((cpu_id[2] >> 25) & 1) bAES = true;
|
||||
|
||||
// To check DAZ support, we first need to check FXSAVE support.
|
||||
if ((cpu_id[3] >> 24) & 1)
|
||||
{
|
||||
// We can use FXSAVE.
|
||||
bFXSR = true;
|
||||
|
||||
GC_ALIGNED16(u8 fx_state[512]);
|
||||
memset(fx_state, 0, sizeof(fx_state));
|
||||
#ifdef _WIN32
|
||||
#ifdef _M_IX86
|
||||
_fxsave(fx_state);
|
||||
#elif defined (_M_X64)
|
||||
_fxsave64(fx_state);
|
||||
#endif
|
||||
#else
|
||||
__asm__("fxsave %0" : "=m" (fx_state));
|
||||
#endif
|
||||
|
||||
// lowest byte of MXCSR_MASK
|
||||
if ((fx_state[0x1C] >> 6) & 1)
|
||||
{
|
||||
// On x86, the FTZ field (supported since SSE1) only flushes denormal _outputs_ to zero,
|
||||
// now that we checked DAZ support (flushing denormal _inputs_ to zero),
|
||||
// we can set our generic flag.
|
||||
bFlushToZero = true;
|
||||
}
|
||||
}
|
||||
|
||||
// AVX support requires 3 separate checks:
|
||||
// - Is the AVX bit set in CPUID?
|
||||
// - Is the XSAVE bit set in CPUID?
|
||||
@ -222,7 +250,12 @@ std::string CPUInfo::Summarize()
|
||||
{
|
||||
std::string sum(cpu_string);
|
||||
if (bSSE) sum += ", SSE";
|
||||
if (bSSE2) sum += ", SSE2";
|
||||
if (bSSE2)
|
||||
{
|
||||
sum += ", SSE2";
|
||||
if (!bFlushToZero)
|
||||
sum += " (but not DAZ!)";
|
||||
}
|
||||
if (bSSE3) sum += ", SSE3";
|
||||
if (bSSSE3) sum += ", SSSE3";
|
||||
if (bSSE4_1) sum += ", SSE4.1";
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "Common.h"
|
||||
#include "FPURoundMode.h"
|
||||
#include "CPUDetect.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
static const unsigned short FPU_ROUND_NEAR = 0 << 10;
|
||||
@ -14,8 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
const u32 MASKS = 0x1F80; // mask away the interrupts.
|
||||
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
|
||||
const u32 EXCEPTION_MASK = 0x1F80;
|
||||
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
|
||||
const u32 DAZ = 0x40;
|
||||
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
||||
const u32 FTZ = 0x8000;
|
||||
|
||||
namespace FPURoundMode
|
||||
@ -79,16 +83,28 @@ namespace FPURoundMode
|
||||
//but still - set any useful sse options here
|
||||
#endif
|
||||
}
|
||||
void SetSIMDMode(u32 mode)
|
||||
|
||||
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode)
|
||||
{
|
||||
static const u32 ssetable[4] =
|
||||
// lookup table for FPSCR.RN-to-MXCSR.RC translation
|
||||
static const u32 roundingModeLUT[4] =
|
||||
{
|
||||
(0 << 13) | MASKS,
|
||||
(3 << 13) | MASKS,
|
||||
(2 << 13) | MASKS,
|
||||
(1 << 13) | MASKS,
|
||||
(0 << 13) | EXCEPTION_MASK, // nearest
|
||||
(3 << 13) | EXCEPTION_MASK, // -inf
|
||||
(2 << 13) | EXCEPTION_MASK, // +inf
|
||||
(1 << 13) | EXCEPTION_MASK, // zero
|
||||
};
|
||||
u32 csr = ssetable[mode];
|
||||
u32 csr = roundingModeLUT[roundingMode];
|
||||
|
||||
static const u32 denormalLUT[2] =
|
||||
{
|
||||
FTZ, // flush-to-zero only
|
||||
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
|
||||
};
|
||||
if (nonIEEEMode)
|
||||
{
|
||||
csr |= denormalLUT[cpu_info.bFlushToZero];
|
||||
}
|
||||
_mm_setcsr(csr);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user