dolphin/Source/Core/Common/ArmFPURoundMode.cpp
JosJuice 9db0ebd4b6 PowerPC: Set host CPU rounding mode on init and savestate
Not doing this can cause desyncs when TASing. (I don't know
how common such desyncs would be, though. For games that
don't change rounding modes, they shouldn't be a problem.)
2021-06-10 20:12:15 +02:00

96 lines
2.5 KiB
C++

// Copyright 2021 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
#include "Common/FPURoundMode.h"
#include "Common/Logging/Log.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
static u64 GetFPCR()
{
#ifdef _MSC_VER
return _ReadStatusReg(ARM64_FPCR);
#else
u64 fpcr;
__asm__ __volatile__("mrs %0, fpcr" : "=r"(fpcr));
return fpcr;
#endif
}
static void SetFPCR(u64 fpcr)
{
#ifdef _MSC_VER
_WriteStatusReg(ARM64_FPCR, fpcr);
#else
__asm__ __volatile__("msr fpcr, %0" : : "ri"(fpcr));
#endif
}
namespace FPURoundMode
{
static const u64 default_fpcr = GetFPCR();
static u64 saved_fpcr = default_fpcr;
void SetRoundMode(int mode)
{
// We don't need to do anything here since SetSIMDMode is always called after calling this
}
void SetSIMDMode(int rounding_mode, bool non_ieee_mode)
{
// When AH is disabled, FZ controls flush-to-zero for both inputs and outputs. When AH is enabled,
// FZ controls flush-to-zero for outputs, and FIZ controls flush-to-zero for inputs.
constexpr u32 FZ = 1 << 24;
constexpr u32 AH = 1 << 1;
constexpr u32 FIZ = 1 << 0;
constexpr u32 flush_to_zero_mask = FZ | AH | FIZ;
// On CPUs with FEAT_AFP support, setting AH = 1, FZ = 1, FIZ = 0 emulates the GC/Wii CPU's
// "non-IEEE mode". Unfortunately, FEAT_AFP didn't exist until 2020, so we can't count on setting
// AH actually doing anything. But flushing both inputs and outputs seems to cause less problems
// than flushing nothing, so let's just set FZ and AH and roll with whatever behavior we get.
const u32 flush_to_zero_bits = (non_ieee_mode ? FZ | AH : 0);
static bool afp_warning_shown = false;
if (!afp_warning_shown && !cpu_info.bAFP && non_ieee_mode)
{
afp_warning_shown = true;
WARN_LOG_FMT(POWERPC,
"Non-IEEE mode was requested, but host CPU is not known to support FEAT_AFP");
}
// lookup table for FPSCR.RN-to-FPCR.RMode translation
constexpr u32 rounding_mode_table[] = {
(0 << 22), // nearest
(3 << 22), // zero
(1 << 22), // +inf
(2 << 22), // -inf
};
constexpr u32 rounding_mode_mask = 3 << 22;
const u32 rounding_mode_bits = rounding_mode_table[rounding_mode];
const u64 base = default_fpcr & ~(flush_to_zero_mask | rounding_mode_mask);
SetFPCR(base | rounding_mode_bits | flush_to_zero_bits);
}
void SaveSIMDState()
{
saved_fpcr = GetFPCR();
}
void LoadSIMDState()
{
SetFPCR(saved_fpcr);
}
void LoadDefaultSIMDState()
{
SetFPCR(default_fpcr);
}
} // namespace FPURoundMode