Improve accuracy of FPU emulation slightly - still no F-Zero improvements :(

Generic code cleanup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3458 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard
2009-06-15 21:10:11 +00:00
parent dcae21f692
commit 4dba267775
16 changed files with 355 additions and 383 deletions

View File

@ -32,7 +32,7 @@ static const u32 default_sse_state = _mm_getcsr();
namespace MathUtil
{
int ClassifyFP(double dvalue)
int ClassifyDouble(double dvalue)
{
// TODO: Optimize the below to be as fast as possible.
IntDouble value;
@ -79,6 +79,53 @@ int ClassifyFP(double dvalue)
return 0x4;
}
int ClassifyFloat(float fvalue)
{
// TODO: Optimize the below to be as fast as possible.
IntFloat value;
value.f = fvalue;
// 5 bits (C, <, >, =, ?)
// easy cases first
if (value.i == 0) {
// positive zero
return 0x2;
} else if (value.i == 0x80000000) {
// negative zero
return 0x12;
} else if (value.i == 0x7F800000) {
// positive inf
return 0x5;
} else if (value.i == 0xFF800000) {
// negative inf
return 0x9;
} else {
// OK let's dissect this thing.
int sign = value.i >> 31;
int exp = (int)((value.i >> 23) & 0xFF);
if (exp >= 1 && exp <= 254) {
// Nice normalized number.
if (sign) {
return 0x8; // negative
} else {
return 0x4; // positive
}
}
u64 mantissa = value.i & 0x007FFFFF;
if (exp == 0 && mantissa) {
// Denormalized number.
if (sign) {
return 0x18;
} else {
return 0x14;
}
} else if (exp == 0xFF && mantissa /* && mantissa_top*/) {
return 0x11; // Quiet NAN
}
}
return 0x4;
}
} // namespace
void LoadDefaultSSEState()

View File

@ -98,12 +98,9 @@ enum PPCFpClass
// Uses PowerPC conventions for the return value, so it can be easily
// used directly in CPU emulation.
int ClassifyFP(double dvalue);
// TODO: More efficient float version.
inline int ClassifyFP(float fvalue) {
ClassifyFP((double)fvalue);
}
int ClassifyDouble(double dvalue);
// More efficient float version.
int ClassifyFloat(float fvalue);
} // namespace MathUtil

View File

@ -745,16 +745,16 @@ void Callback_VideoCopiedToXFB()
*/
/**/
if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0/3.0 + 1.0/2.0)/2)) FPS_To_VPS_Rate = 1.0/3.0;
else if (FPS_To_VPS_Rate > ((1.0/3.0 + 1.0/2.0)/2) && FPS_To_VPS_Rate < ((1.0/2.0 + 1.0/1.0)/2)) FPS_To_VPS_Rate = 1.0/2.0;
if (FPS_To_VPS_Rate > 0 && FPS_To_VPS_Rate < ((1.0f/3.0f + 1.0f/2.0f)/2)) FPS_To_VPS_Rate = 1.0f/3.0f;
else if (FPS_To_VPS_Rate > ((1.0f/3.0f + 1.0f/2.0f)/2) && FPS_To_VPS_Rate < ((1.0f/2.0f + 1.0f/1.0f)/2)) FPS_To_VPS_Rate = 1.0/2.0;
else FPS_To_VPS_Rate = 1.0;
// PAL patch adjustment
if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2;
if (VideoInterface::TargetRefreshRate == 50) FPS_To_VPS_Rate = FPS_To_VPS_Rate * 1.2f;
float TargetFPS = FPS_To_VPS_Rate * (float)VideoInterface::TargetRefreshRate;
float FPSPercentage = (FPS / TargetFPS) * 100.0;
float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0;
float FPSPercentage = (FPS / TargetFPS) * 100.0f;
float VPSPercentage = (VideoInterface::ActualRefreshRate / (float)VideoInterface::TargetRefreshRate) * 100.0f;
// Settings are shown the same for both extended and summary info
std::string SSettings = StringFromFormat(" | Core: %s %s",

View File

@ -338,7 +338,9 @@ static u32 LineCount = 0;
static u32 LinesPerField = 0;
static u64 LastTime = 0;
static u32 NextXFBRender = 0;
int TargetRefreshRate = 0, SyncTicksProgress = 0; float ActualRefreshRate = 0.0;
int TargetRefreshRate = 0;
s64 SyncTicksProgress = 0;
float ActualRefreshRate = 0.0;
void DoState(PointerWrap &p)
{
@ -1042,23 +1044,24 @@ void UpdateTiming()
// Run when: This is run 7200 times per second on full speed
void Update()
{
// Update the target refresh rate
TargetRefreshRate = (m_DisplayControlRegister.FMT == 0 || m_DisplayControlRegister.FMT == 2)
? 60 : 50;
// Calculate actual refresh rate
static u64 LastTick = 0;
static int UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
static s64 UpdateCheck = timeGetTime() + 1000, TickProgress = 0;
if (UpdateCheck < (int)timeGetTime())
{
UpdateCheck = timeGetTime() + 1000;
TickProgress = CoreTiming::GetTicks() - LastTick;
// Calculated CPU-GPU synced ticks for the dual core mode too
NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
// NOTICE_LOG(VIDEO, "Removed: %s Mhz", ThS(SyncTicksProgress / 1000000, false).c_str());
SyncTicksProgress += TickProgress;
// Multipled by two because of the way TicksPerFrame is calculated (divided by 25 and 30
// rather than 50 and 60)
// TODO : Feed the FPS estimate into Iulius' framelimiter.
ActualRefreshRate = ((float)SyncTicksProgress / (float)TicksPerFrame) * 2.0;
LastTick = CoreTiming::GetTicks();
SyncTicksProgress = 0;

View File

@ -52,7 +52,11 @@ namespace VideoInterface
// Update and draw framebuffer(s)
void Update();
extern float ActualRefreshRate; extern int TargetRefreshRate, SyncTicksProgress;
// urgh, ugly externs.
extern float ActualRefreshRate;
extern int TargetRefreshRate;
extern s64 SyncTicksProgress;
// UpdateInterrupts: check if we have to generate a new VI Interrupt
void UpdateInterrupts();

View File

@ -311,7 +311,6 @@ namespace Interpreter
// other helper
u32 Helper_Mask(int mb, int me);
inline bool IsNAN(double _dValue);
extern _interpreterInstruction m_opTable[64];
extern _interpreterInstruction m_opTable4[1024];

View File

@ -34,36 +34,16 @@
#include "../../Core.h"
#include "Interpreter.h"
#include "MathUtil.h"
// SUPER MONKEY BALL IS BEING A ROYAL PAIN
// We are missing the caller of 800070ec
// F-ZERO IS BEING A ROYAL PAIN
// POSSIBLE APPROACHES:
// * Full SW FPU. Urgh.
// * Partial SW FPU, emulate just as much as necessary for monkey ball. Feasible but a lot of work.
// * HLE hacking. Figure out what all the evil functions really do and fake them. DONE (well, works okay-ish)
// * Partial SW FPU, emulate just as much as necessary for f-zero. Feasible, I guess.
// * HLE hacking. Figure out what all the evil functions really do and fake them.
// This worked well for Monkey Ball, not so much for F-Zero.
// Interesting places in Super Monkey Ball:
// 80036654: fctwixz stuff
// 80007e08:
// -98: Various entry points that loads various odd fp values into f1
// 800070b0: Estimate inverse square root.
// 800070ec: Examine f1. Reads a value out of locked cache into f2 (fixed address). Some cases causes us to call the above thing.
// If all goes well, jump to 70b0, which estimates the inverse square root.
// Then multiply the loaded variable with the original value of f1. Result should be the square root. (1 / sqrt(x)) * x = x / sqrt(x) = sqrt(x)
// 8000712c: Similar, but does not do the multiply at the end, just an frspx.
// 8000716c: Sort of similar, but has extra junk at the end.
//
//
// 800072a4 - nightmare of nightmares
// Fun stuff used:
// bso+
// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly (!!!!)
// crclr
// crset
// crxor
// fnabs
// Super Monkey Ball reads FPRF & friends after fmadds, fmuls, frspx
// WHY do the FR & FI flags affect it so much?
using namespace MathUtil;
namespace Interpreter
{
@ -71,112 +51,68 @@ namespace Interpreter
void UpdateFPSCR(UReg_FPSCR fp);
void UpdateSSEState();
// start of unit test - Dolphin needs more of these!
/*
void TestFPRF()
{
UpdateFPRF(1.0);
if (FPSCR.FPRF != 0x4)
PanicAlert("Error 1");
UpdateFPRF(-1.0);
if (FPSCR.FPRF != 0x8)
PanicAlert("Error 2");
PanicAlert("Test done");
}*/
// extremely rare
// Extremely rare - actually, never seen.
void Helper_UpdateCR1(double _fValue)
{
// Should just update exception flags, not do any compares.
PanicAlert("CR1");
}
inline bool IsNAN(double _dValue)
{
return _dValue != _dValue;
}
inline bool _IsNAN(float x) {
//return ((*(u32*)&x) & 0x7f800000UL) == 0x7f800000UL && ((*(u32*)&x) & 0x007fffffUL);
return x != x;
}
void fcmpo(UGeckoInstruction _inst)
{
/*
float fa = static_cast<float>(rPS0(_inst.FA));
float fb = static_cast<float>(rPS0(_inst.FB));
// normalize
if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
*/
// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
// the real problem should be fixed instead.
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
// normalize if conversion to float gives denormalized number
if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
riPS0(_inst.FA) &= 0x8000000000000000ULL;
if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
riPS0(_inst.FB) &= 0x8000000000000000ULL;
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
u32 compareResult;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
int compareResult;
if (IsNAN(fa) || IsNAN(fb))
{
FPSCR.FX = 1;
compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb))
{
FPSCR.VXSNAN = 1;
if (!FPSCR.FEX || IsQNAN(fa) || IsQNAN(fb))
FPSCR.VXVC = 1;
}
}
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult);
/* missing part
if ((frA) is an SNaN or (frB) is an SNaN )
then VXSNAN <20> 1
if VE = 0
then VXVC <20> 1
else if ((frA) is a QNaN or (frB) is a QNaN )
then VXVC <20> 1 */
}
void fcmpu(UGeckoInstruction _inst)
{
// Use FlushToZeroAsFloat() to fix a couple of games - but seriously,
// the real problem should be fixed instead.
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
/*
float fa = static_cast<float>(rPS0(_inst.FA));
float fb = static_cast<float>(rPS0(_inst.FB));
// normalize
if (((*(u32*)&fa) & 0x7f800000UL) == 0) (*(u32*)&fa) &= 0x80000000UL;
if (((*(u32*)&fb) & 0x7f800000UL) == 0) (*(u32*)&fb) &= 0x80000000UL;
*/
// normalize if conversion to float gives denormalized number
if ((riPS0(_inst.FA) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
riPS0(_inst.FA) &= 0x8000000000000000ULL;
if ((riPS0(_inst.FB) & 0x7ff0000000000000ULL) < 0x3800000000000000ULL)
riPS0(_inst.FB) &= 0x8000000000000000ULL;
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
u32 compareResult;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
int compareResult;
if (IsNAN(fa) || IsNAN(fb))
{
FPSCR.FX = 1;
compareResult = 1;
if (IsSNAN(fa) || IsSNAN(fb))
{
FPSCR.VXSNAN = 1;
}
}
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult);
/* missing part
if ((frA) is an SNaN or (frB) is an SNaN)
then VXSNAN <20> 1 */
}
// Apply current rounding mode
void fctiwx(UGeckoInstruction _inst)
{
//UpdateSSEState();
const double b = rPS0(_inst.FB);
u32 value;
if (b > (double)0x7fffffff)
@ -215,7 +151,6 @@ largest representable int on PowerPC. */
// Always round toward zero
void fctiwzx(UGeckoInstruction _inst)
{
//UpdateSSEState();
const double b = rPS0(_inst.FB);
u32 value;
if (b > (double)0x7fffffff)
@ -282,76 +217,14 @@ void fselx(UGeckoInstruction _inst)
// !!! warning !!!
// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
// PS1 is said to be undefined
// Super Monkey Ball is using this to do wacky tricks so we need 100% correct emulation.
void frspx(UGeckoInstruction _inst) // round to single
{
if (true || FPSCR.RN != 0)
{
// Not used in Super Monkey Ball
// UpdateSSEState();
double b = rPS0(_inst.FB);
double rounded = (double)(float)b;
//FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
if (Core::g_CoreStartupParameter.bEnableFPRF)
UpdateFPRF(rounded);
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
return;
// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
}
// OK, let's try it in 100% software! Not yet working right.
union {
double d;
u64 i;
} in, out;
in.d = rPS0(_inst.FB);
out = in;
int sign = (int)(in.i >> 63);
int exp = (int)((in.i >> 52) & 0x7FF);
u64 mantissa = in.i & 0x000FFFFFFFFFFFFFULL;
u64 mantissa_single = mantissa & 0x000FFFFFE0000000ULL;
u64 leftover_single = mantissa & 0x000000001FFFFFFFULL;
// OK. First make sure that we have a "normal" number.
if (exp >= 1 && exp <= 2046) {
// OK. Check for overflow. TODO
FPSCR.FI = leftover_single != 0; // Inexact
if (leftover_single >= 0x10000000ULL) {
//PanicAlert("rounding up");
FPSCR.FR = 1;
mantissa_single += 0x20000000;
if (mantissa_single & 0x0010000000000000ULL) {
// PanicAlert("renormalizing");
mantissa_single >>= 1;
exp += 1;
// if (exp > 2046) { OVERFLOW }
}
}
out.i = ((u64)sign << 63) | ((u64)exp << 52) | mantissa_single;
} else {
if (!exp && !mantissa) {
// Positive or negative Zero. All is well.
FPSCR.FI = 0;
FPSCR.FR = 0;
} else if (exp == 0 && mantissa) {
// Denormalized number.
PanicAlert("denorm");
} else if (exp == 2047 && !mantissa) {
// Infinite.
//PanicAlert("infinite");
FPSCR.FI = 1;
FPSCR.FR = 1;
// FPSCR.OX = 1;
} else {
//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
}
}
UpdateFPRF(out.d);
rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
double b = rPS0(_inst.FB);
double rounded = (double)(float)b;
//FPSCR.FI = b != rounded;
UpdateFPRF(rounded);
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
return;
}
@ -394,11 +267,13 @@ void fmaddsx(UGeckoInstruction _inst)
void faddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void faddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -407,51 +282,79 @@ void fdivx(UGeckoInstruction _inst)
{
double a = rPS0(_inst.FA);
double b = rPS0(_inst.FB);
if (a == 0.0f && b == 0.0f)
rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0; // NAN?
else
rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
if (fabs(rPS0(_inst.FB)) == 0.0) {
if (!FPSCR.ZX)
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fdivsx(UGeckoInstruction _inst)
{
float a = rPS0(_inst.FA);
float b = rPS0(_inst.FB);
if (a != a || b != b)
rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0; // NAN?
else
rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
rPS0(_inst.FD) = a / b;
if (b == 0.0) {
if (!FPSCR.ZX)
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fresx(UGeckoInstruction _inst)
void fdivsx(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
rPS0(_inst.FD) = rPS1(_inst.FD) = 1.0 / b;
if (fabs(rPS0(_inst.FB)) == 0.0) {
float a = (float)rPS0(_inst.FA);
float b = (float)rPS0(_inst.FB);
rPS0(_inst.FD) = rPS1(_inst.FD) = a / b;
if (b == 0.0)
{
if (!FPSCR.ZX)
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
}
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
// Single precision only.
void fresx(UGeckoInstruction _inst)
{
float b = (float)rPS0(_inst.FB);
float one_over = 1.0f / b;
rPS0(_inst.FD) = rPS1(_inst.FD) = one_over;
if (b == 0.0)
{
if (!FPSCR.ZX)
FPSCR.FX = 1;
FPSCR.ZX = 1;
FPSCR.XX = 1;
}
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void frsqrtex(UGeckoInstruction _inst)
{
float b = (float)rPS0(_inst.FB);
if (b < 0.0) {
FPSCR.VXSQRT = 1;
} else if (b == 0) {
FPSCR.ZX = 1;
}
rPS0(_inst.FD) = 1.0f / sqrtf(b);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsqrtx(UGeckoInstruction _inst)
{
// GEKKO is not supposed to support this instruction.
// PanicAlert("fsqrtx");
double b = rPS0(_inst.FB);
if (b < 0.0) {
FPSCR.VXSQRT = 1;
}
rPS0(_inst.FD) = sqrt(b);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -459,6 +362,7 @@ void fmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -466,12 +370,14 @@ void fmsubsx(UGeckoInstruction _inst)
void fnmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -479,12 +385,14 @@ void fnmaddsx(UGeckoInstruction _inst)
void fnmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -492,32 +400,13 @@ void fnmsubsx(UGeckoInstruction _inst)
void fsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void frsqrtex(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
if (b <= 0.0)
rPS0(_inst.FD) = 0.0;
else
rPS0(_inst.FD) = 1.0f / (sqrt(b));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsqrtx(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
if (b < 0.0)
{
FPSCR.VXSQRT = 1;
}
rPS0(_inst.FD) = sqrt(b);
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}

View File

@ -493,7 +493,7 @@ void divwux(UGeckoInstruction _inst)
u32 a = m_GPR[_inst.RA];
u32 b = m_GPR[_inst.RB];
if (b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF))
if (b == 0) // || (a == 0x80000000 && b == 0xFFFFFFFF))
{
if (_inst.OE)
PanicAlert("OE: divwux");

View File

@ -15,6 +15,9 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "MathUtil.h"
#include "../../HW/Memmap.h"
#include "../../HW/CommandProcessor.h"
#include "../../HW/PixelEngine.h"
@ -92,16 +95,18 @@ void lfdx(UGeckoInstruction _inst)
void lfs(UGeckoInstruction _inst)
{
u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
rPS0(_inst.FD) = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD);
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
}
void lfsu(UGeckoInstruction _inst)
{
u32 uAddress = Helper_Get_EA_U(_inst);
u32 uTemp = Memory::Read_U32(uAddress);
rPS0(_inst.FD) = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD);
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
m_GPR[_inst.RA] = uAddress;
}
@ -109,16 +114,18 @@ void lfsux(UGeckoInstruction _inst)
{
u32 uAddress = Helper_Get_EA_UX(_inst);
u32 uTemp = Memory::Read_U32(uAddress);
rPS0(_inst.FD) = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD);
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
m_GPR[_inst.RA] = uAddress;
}
void lfsx(UGeckoInstruction _inst)
{
u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
rPS0(_inst.FD) = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD);
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
}
void lha(UGeckoInstruction _inst)
@ -227,7 +234,8 @@ void stfdu(UGeckoInstruction _inst)
void stfs(UGeckoInstruction _inst)
{
float fTemp = (float)rPS0(_inst.FS);
double value = rPS0(_inst.FS);
float fTemp = (float)value;
Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
}
@ -453,27 +461,20 @@ void stfiwx(UGeckoInstruction _inst)
Memory::Write_U32((u32)riPS0(_inst.FS), uAddress);
}
// __________________________________________________________________________________________________
// stfsux
//
// no paired ??
//
void stfsux(UGeckoInstruction _inst)
{
float fTemp = (float)rPS0(_inst.FS);
double value = rPS0(_inst.FS);
float fTemp = (float)value;
u32 uAddress = Helper_Get_EA_UX(_inst);
Memory::Write_U32(*(u32*)&fTemp, uAddress);
m_GPR[_inst.RA] = uAddress;
}
// __________________________________________________________________________________________________
// stfsx
//
// no paired ??
//
void stfsx(UGeckoInstruction _inst)
{
float fTemp = (float)rPS0(_inst.FS);
double value = rPS0(_inst.FS);
float fTemp = (float)value;
Memory::Write_U32(*(u32 *)&fTemp, Helper_Get_EA_X(_inst));
}

View File

@ -16,41 +16,52 @@
// http://code.google.com/p/dolphin-emu/
#include <math.h>
#include "Common.h"
#include "MathUtil.h"
#include "Interpreter.h"
#include "../../HW/Memmap.h"
using namespace MathUtil;
namespace Interpreter
{
// These "binary instructions" do not alter FPSCR.
void ps_sel(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) >= -0.0) ? rPS1(_inst.FC) : rPS1(_inst.FB));
rPS0(_inst.FD) = !IsNAN(rPS0(_inst.FA)) && rPS0(_inst.FA) >= -0.0 ?
rPS0(_inst.FC) : rPS0(_inst.FB);
rPS1(_inst.FD) = !IsNAN(rPS1(_inst.FA)) && rPS1(_inst.FA) >= -0.0 ?
rPS1(_inst.FC) : rPS1(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_neg(UGeckoInstruction _inst)
{
riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63);
riPS1(_inst.FD) = riPS1(_inst.FB) ^ (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_mr(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FB);
rPS1(_inst.FD) = rPS1(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_nabs(UGeckoInstruction _inst)
{
riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
riPS1(_inst.FD) = riPS1(_inst.FB) | (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_abs(UGeckoInstruction _inst)
{
riPS0(_inst.FD) = riPS0(_inst.FB) &~ (1ULL << 63);
riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
// These are just moves, double is OK.
@ -60,6 +71,7 @@ void ps_merge00(UGeckoInstruction _inst)
double p1 = rPS0(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_merge01(UGeckoInstruction _inst)
@ -68,6 +80,7 @@ void ps_merge01(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_merge10(UGeckoInstruction _inst)
@ -76,6 +89,7 @@ void ps_merge10(UGeckoInstruction _inst)
double p1 = rPS0(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_merge11(UGeckoInstruction _inst)
@ -84,6 +98,7 @@ void ps_merge11(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -97,63 +112,75 @@ void ps_div(UGeckoInstruction _inst)
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
}
void ps_sub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
}
void ps_add(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_res(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB));
rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_rsqrte(UGeckoInstruction _inst)
{
// PanicAlert("ps_rsqrte");
rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_sub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_add(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_mul(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_rsqrte(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
}
void ps_msub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_madd(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_nmsub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB)));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_nmadd(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB)));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_sum0(UGeckoInstruction _inst)
@ -162,6 +189,7 @@ void ps_sum0(UGeckoInstruction _inst)
double p1 = (float)(rPS1(_inst.FC));
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_sum1(UGeckoInstruction _inst)
@ -170,6 +198,7 @@ void ps_sum1(UGeckoInstruction _inst)
double p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_muls0(UGeckoInstruction _inst)
@ -178,6 +207,7 @@ void ps_muls0(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_muls1(UGeckoInstruction _inst)
@ -186,6 +216,7 @@ void ps_muls1(UGeckoInstruction _inst)
double p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_madds0(UGeckoInstruction _inst)
@ -194,6 +225,7 @@ void ps_madds0(UGeckoInstruction _inst)
double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_madds1(UGeckoInstruction _inst)
@ -202,6 +234,7 @@ void ps_madds1(UGeckoInstruction _inst)
double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_cmpu0(UGeckoInstruction _inst)
@ -209,10 +242,12 @@ void ps_cmpu0(UGeckoInstruction _inst)
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
int compareResult;
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_cmpo0(UGeckoInstruction _inst)
@ -226,10 +261,12 @@ void ps_cmpu1(UGeckoInstruction _inst)
double fa = rPS1(_inst.FA);
double fb = rPS1(_inst.FB);
int compareResult;
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void ps_cmpo1(UGeckoInstruction _inst)

View File

@ -35,6 +35,7 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include <xmmintrin.h>
#endif
#include "CPUDetect.h"
#include "../../CoreTiming.h"
#include "../../HW/Memmap.h"
#include "../../HW/GPFifo.h"
@ -60,37 +61,11 @@ mffsx: 80036650 (huh?)
namespace Interpreter
{
void UpdateSSEState()
{
u32 csr = _mm_getcsr();
const int ssetable[4] =
{
0,
3,
2,
1,
};
csr = csr & 0x9FFF;
csr |= ssetable[FPSCR.RN] << 13;
const u32 MASKS = 0x1F80; // mask away the interrupts.
const u32 DAZ = 0x40;
const u32 FTZ = 0x8000;
// Also handle denormals as zero (FZ + DAZ)
csr &= ~0x8020;
// SETTING FTZ+DAZ KILLS BEYOND GOOD AND EVIL
//if (daz)
// csr |= 0x20; // Only set DAZ //0x8020;
_mm_setcsr(csr);
}
void RestoreSSEState()
{
// A reasonable default
_mm_setcsr(0x1fa0);
}
void UpdateFPSCR(UReg_FPSCR fp)
void FPSCRtoFPUSettings(UReg_FPSCR fp)
{
// Set FPU rounding mode to mimic the PowerPC's
#ifdef _M_IX86
@ -120,12 +95,28 @@ void UpdateFPSCR(UReg_FPSCR fp)
#endif
if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
{
// PanicAlert("FPSCR - exceptions enabled. Please report.");
//PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
// fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
// Pokemon Colosseum does this. Gah.
}
// Also corresponding SSE rounding mode setting
UpdateSSEState();
static const u32 ssetable[4] =
{
(0 << 13) | MASKS,
(3 << 13) | MASKS,
(2 << 13) | MASKS,
(1 << 13) | MASKS,
};
u32 csr = ssetable[FPSCR.RN];
if (FPSCR.NI)
{
// Either one of these two breaks Beyond Good & Evil.
// if (cpu_info.bSSSE3)
// csr |= DAZ;
// csr |= FTZ;
}
_mm_setcsr(csr);
}
void mcrfs(UGeckoInstruction _inst)
@ -158,25 +149,9 @@ void mcrfs(UGeckoInstruction _inst)
break;
}
SetCRField(_inst.CRFD, fpflags);
UpdateFPSCR(FPSCR);
FPSCRtoFPUSettings(FPSCR);
}
#define MXCSR_IE 1
#define MXCSR_DE 2 // denormal
#define MXCSR_ZE 4 // divide by zero, sticky
#define MXCSR_OE 8 // overflow
#define MXCSR_UE 16 // underflow
#define MXCSR_PE 32 // precision
#define MXCSR_DAZ 64
#define MXCSR_IM 128
#define MXCSR_DM 256
#define MXCSR_ZM 512
#define MXCSR_OM 1024
#define MXCSR_UM 2048
#define MXCSR_PM 4096
#define MXCSR_ROUND (16384|8192)
#define MXCSR_FLUSH 32768
void mffsx(UGeckoInstruction _inst)
{
// load from FPSCR
@ -190,14 +165,14 @@ void mffsx(UGeckoInstruction _inst)
void mtfsb0x(UGeckoInstruction _inst)
{
FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD));
UpdateFPSCR(FPSCR);
FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
}
void mtfsb1x(UGeckoInstruction _inst)
{
FPSCR.Hex |= 0x80000000 >> _inst.CRBD;
UpdateFPSCR(FPSCR);
FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
}
@ -206,7 +181,7 @@ void mtfsfix(UGeckoInstruction _inst)
u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
u32 imm = (_inst.hex << 16) & 0xF0000000;
FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
UpdateFPSCR(FPSCR);
FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
}
@ -214,13 +189,14 @@ void mtfsfx(UGeckoInstruction _inst)
{
u32 fm = _inst.FM;
u32 m = 0;
for (int i = 0; i < 8; i++) { //7?? todo check
for (int i = 0; i < 8; i++) //7?? todo check
{
if (fm & (1 << i))
m |= (0xF << (i*4));
m |= (0xF << (i * 4));
}
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
UpdateFPSCR(FPSCR);
FPSCRtoFPUSettings(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
}

View File

@ -363,7 +363,7 @@ void OnIdleIL()
void UpdateFPRF(double dvalue)
{
FPSCR.FPRF = MathUtil::ClassifyFP(dvalue);
FPSCR.FPRF = MathUtil::ClassifyDouble(dvalue);
//if (FPSCR.FPRF == 0x11)
// PanicAlert("QNAN alert");
}