Lots of FP hacking for little gain, super monkey ball is only slightly more sane (wow, it rhymed). Temporary no-speed-limit hack: Hold TAB.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@279 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-23 09:20:36 +00:00
parent 0becaa3223
commit f82bf2ae9a
25 changed files with 1107 additions and 861 deletions

View File

@ -2075,7 +2075,7 @@ namespace PPCDisasm
break;
case 20:
fdabc(dp,in,"sqrte",2,0);
fdabc(dp,in,"rsqrte",2,0);
break;
case 24:

View File

@ -12,6 +12,7 @@
#ifndef _WX_LISTCTRL_H_
#define _WX_LISTCTRL_H_
#include "wx/dcbuffer.h"
#include "wx/textctrl.h"
class WXDLLIMPEXP_FWD_CORE wxImageList;

View File

@ -50,6 +50,7 @@
BasicRuntimeChecks="3"
RuntimeLibrary="1"
BufferSecurityCheck="true"
FloatingPointModel="0"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"
WarningLevel="3"
@ -118,6 +119,7 @@
BasicRuntimeChecks="3"
RuntimeLibrary="1"
BufferSecurityCheck="true"
FloatingPointModel="0"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"
WarningLevel="3"
@ -190,7 +192,7 @@
RuntimeLibrary="0"
BufferSecurityCheck="false"
EnableEnhancedInstructionSet="2"
FloatingPointModel="2"
FloatingPointModel="0"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"
WarningLevel="3"
@ -265,7 +267,7 @@
RuntimeLibrary="0"
BufferSecurityCheck="false"
EnableEnhancedInstructionSet="0"
FloatingPointModel="2"
FloatingPointModel="0"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"
WarningLevel="3"
@ -336,6 +338,7 @@
PreprocessorDefinitions="NDEBUG;_LIB;LOGGING;DEBUGFAST;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0"
RuntimeLibrary="0"
BufferSecurityCheck="false"
FloatingPointModel="0"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"
WarningLevel="3"
@ -407,6 +410,7 @@
PreprocessorDefinitions="NDEBUG;_LIB;LOGGING;DEBUGFAST;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0"
RuntimeLibrary="0"
BufferSecurityCheck="false"
FloatingPointModel="0"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"
WarningLevel="3"
@ -835,6 +839,14 @@
<File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_FloatingPoint.cpp"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_Integer.cpp"
@ -844,6 +856,10 @@
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_LoadStore.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_LoadStorePaired.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_Paired.cpp"
>
@ -851,6 +867,14 @@
<File
RelativePath=".\Src\PowerPC\Interpreter\Interpreter_SystemRegisters.cpp"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"
/>
</FileConfiguration>
</File>
</Filter>
<Filter
@ -946,46 +970,6 @@
</File>
</Filter>
</Filter>
<Filter
Name="Boot"
>
<File
RelativePath=".\Src\Boot\Boot.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\Boot.h"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_DOL.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_DOL.h"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_ELF.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_ELF.h"
>
</File>
<File
RelativePath=".\Src\Boot\ElfReader.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\ElfReader.h"
>
</File>
<File
RelativePath=".\Src\Boot\ElfTypes.h"
>
</File>
</Filter>
<Filter
Name="Debugger"
>
@ -1025,6 +1009,46 @@
RelativePath=".\Src\Debugger\PPCDebugInterface.h"
>
</File>
<Filter
Name="Boot"
>
<File
RelativePath=".\Src\Boot\Boot.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\Boot.h"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_DOL.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_DOL.h"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_ELF.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\Boot_ELF.h"
>
</File>
<File
RelativePath=".\Src\Boot\ElfReader.cpp"
>
</File>
<File
RelativePath=".\Src\Boot\ElfReader.h"
>
</File>
<File
RelativePath=".\Src\Boot\ElfTypes.h"
>
</File>
</Filter>
</Filter>
<Filter
Name="IPC HLE"

View File

@ -16,6 +16,7 @@
// http://code.google.com/p/dolphin-emu/
#include <math.h>
#include <limits>
#ifdef _WIN32
#include <intrin.h>
@ -26,16 +27,87 @@
#include "../../Core.h"
#include "Interpreter.h"
// If you wanna have fun, read:
// 80007e08 in super monkey ball
// SUPER MONKEY BALL IS BEING A ROYAL PAIN
// We are missing the caller of 800070ec
// POSSIBLE APPROACHES:
// * Full SW FPU. Urgh.
// * Partial SW FPU, emulate just as much as necessary for monkey ball. Feasible but a lot of work.
// * HLE hacking. Figure out what all the evil functions really do and fake them.
// Interesting places in Super Monkey Ball:
// 80036654: fctwixz stuff
// 80007e08:
// -98: Various entry points that loads various odd fp values into f1
// 800070b0: Estimate inverse square root.
// 800070ec: Examine f1. Reads a value out of locked cache into f2 (fixed address). Some cases causes us to call the above thing.
// If all goes well, jump to 70b0, which estimates the inverse square root.
// Then multiply the loaded variable with the original value of f1. Result should be the square root. (1 / sqrt(x)) * x = x / sqrt(x) = sqrt(x)
// 8000712c: Similar, but does not do the multiply at the end, just an frspx.
// 8000716c: Sort of similar, but has extra junk at the end.
//
//
// 800072a4 - nightmare of nightmares
// Fun stuff used:
// bso+
// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly
// mcrfs (ARGH pulls stuff out of .. FPSCR). it uses this to check the result of frsp mostly (!!!!)
// crclr
// crset
// crxor
// fnabs
//
// Super Monkey Ball reads FPRF & friends after fmadds, fmuls, frspx
// WHY do the FR & FI flags affect it so much?
void UpdateFPSCR(UReg_FPSCR fp);
void UpdateSSEState();
void UpdateFPRF(double value)
{
u64 ivalue = *((u64*)&value);
// 5 bits (C, <, >, =, ?)
// top: class descriptor
FPSCR.FPRF = 4;
// easy cases first
if (ivalue == 0) {
// positive zero
FPSCR.FPRF = 0x2;
} else if (ivalue == 0x8000000000000000ULL) {
// negative zero
FPSCR.FPRF = 0x12;
} else if (ivalue == 0x7FF0000000000000ULL) {
// positive inf
FPSCR.FPRF = 0x5;
} else if (ivalue == 0xFFF0000000000000ULL) {
// negative inf
FPSCR.FPRF = 0x9;
} else {
// OK let's dissect this thing.
int sign = ivalue >> 63;
int exp = (ivalue >> 52) & 0x7FF;
if (exp >= 1 && exp <= 2046) {
// Nice normalized number.
if (sign) {
FPSCR.FPRF = 0x8; // negative
} else {
FPSCR.FPRF = 0x4; // positive
}
return;
}
u64 mantissa = ivalue & 0x000FFFFFFFFFFFFFULL;
int mantissa_top = mantissa >> 51;
if (exp == 0 && mantissa) {
// Denormalized number.
if (sign) {
FPSCR.FPRF = 0x18;
} else {
FPSCR.FPRF = 0x14;
}
} else if (exp == 0x7FF && mantissa /* && mantissa_top*/) {
FPSCR.FPRF = 0x11; // Quiet NAN
return;
}
}
}
// extremely rare
void CInterpreter::Helper_UpdateCR1(double _fValue)
@ -48,110 +120,24 @@ void CInterpreter::Helper_UpdateCR1(double _fValue)
if (_fValue < 0.0)
FPSCR.FPRF |= 8;
SetCRField(1, (FPSCR.Hex & 0x0000F000) >> 12);
PanicAlert("CR1");
}
bool CInterpreter::IsNAN(double _dValue)
{
// not implemented
return _dValue != _dValue;
}
void CInterpreter::faddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fdivsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmulsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fresx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fsqrtsx(UGeckoInstruction _inst)
{
static bool bFirst = true;
if (bFirst)
PanicAlert("fsqrtsx - Instruction unimplemented");
bFirst = false;
}
void CInterpreter::fsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
//
//--- END OF SINGLE PRECISION ---
//
void CInterpreter::fabsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = fabs(rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fcmpo(UGeckoInstruction _inst)
{
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
u32 compareResult;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if(fa < fb) compareResult = 8;
else if(fa > fb) compareResult = 4;
else compareResult = 2;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult);
@ -171,10 +157,10 @@ void CInterpreter::fcmpu(UGeckoInstruction _inst)
double fb = rPS0(_inst.FB);
u32 compareResult;
if(IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if(fa < fb) compareResult = 8;
else if(fa > fb) compareResult = 4;
else compareResult = 2;
if (IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
FPSCR.FPRF = compareResult;
SetCRField(_inst.CRFD, compareResult);
@ -184,25 +170,36 @@ void CInterpreter::fcmpu(UGeckoInstruction _inst)
then VXSNAN ¬ 1 */
}
// Apply current rounding mode
void CInterpreter::fctiwx(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
UpdateSSEState();
const double b = rPS0(_inst.FB);
u32 value;
if (b > (double)0x7fffffff)
{
value = 0x7fffffff;
FPSCR.VXCVI = 1;
}
else if (b < -(double)0x7fffffff)
{
value = 0x80000000;
FPSCR.VXCVI = 1;
}
else
value = (u32)(s32)_mm_cvtsd_si32(_mm_set_sd(b)); // TODO(ector): enforce chop
{
value = (u32)(s32)_mm_cvtsd_si32(_mm_set_sd(b)); // obey current rounding mode
double d_value = (double)value;
bool inexact = (d_value != b);
// FPSCR.FI = inexact ? 1 : 0;
// FPSCR.XX |= FPSCR.FI;
// FPSCR.FR = fabs(d_value) > fabs(b);
}
//TODO: FR
//FPRF undefined
riPS0(_inst.FD) = (u64)value; // zero extend
/* TODO(ector):
FPSCR[FR] is set if the result is incremented when rounded.
FPSCR[FI] is set if the result is inexact.
*/
if (_inst.Rc)
Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -215,14 +212,29 @@ largest representable int on PowerPC. */
// Always round toward zero
void CInterpreter::fctiwzx(UGeckoInstruction _inst)
{
double b = rPS0(_inst.FB);
//UpdateFPSCR(FPSCR);
const double b = rPS0(_inst.FB);
u32 value;
if (b > (double)0x7fffffff)
{
value = 0x7fffffff;
FPSCR.VXCVI = 1;
}
else if (b < -(double)0x7fffffff)
{
value = 0x80000000;
FPSCR.VXCVI = 1;
}
else
value = (u32)(s32)_mm_cvttsd_si32(_mm_set_sd(b)); //TODO(ector): force round toward zero
{
value = (u32)(s32)_mm_cvttsd_si32(_mm_set_sd(b)); // truncate
double d_value = (double)value;
bool inexact = (d_value != b);
// FPSCR.FI = inexact ? 1 : 0;
// FPSCR.XX |= FPSCR.FI;
// FPSCR.FR = 1; //fabs(d_value) > fabs(b);
}
//FPRF undefined
riPS0(_inst.FD) = (u64)value;
if (_inst.Rc)
@ -232,109 +244,281 @@ void CInterpreter::fctiwzx(UGeckoInstruction _inst)
void CInterpreter::fmrx(UGeckoInstruction _inst)
{
riPS0(_inst.FD) = riPS0(_inst.FB);
// rPS1(_inst.FD) = rPS0(_inst.FD); // TODO: Should this be here?
// This is a binary instruction. Does not alter FPSCR
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fabsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = fabs(rPS0(_inst.FB));
// This is a binary instruction. Does not alter FPSCR
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnabsx(UGeckoInstruction _inst)
{
riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
// This is a binary instruction. Does not alter FPSCR
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnegx(UGeckoInstruction _inst)
{
riPS0(_inst.FD) = riPS0(_inst.FB) ^ (1ULL << 63);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
// !!! warning !!!
// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
// PS1 is said to be undefined
// TODO(ector): TODO(fires): does this apply to all of the below opcodes?
void CInterpreter::frspx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::faddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fdivx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmulx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::frsqrtex(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0 / (sqrt(rPS0(_inst.FB)));
// This is a binary instruction. Does not alter FPSCR
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fselx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) >= 0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB);
rPS0(_inst.FD) = (rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB);
// This is a binary instruction. Does not alter FPSCR
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
// !!! warning !!!
// PS1 must be set to the value of PS0 or DragonballZ will be f**ked up
// PS1 is said to be undefined
// Super Monkey Ball is using this to do wacky tricks so we need 100% correct emulation.
void CInterpreter::frspx(UGeckoInstruction _inst) // round to single
{
if (true || FPSCR.RN != 0)
{
// Not used in Super Monkey Ball
UpdateSSEState();
double b = rPS0(_inst.FB);
double rounded = (double)(float)b;
FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
FPSCR.FR = 1; // WHY? fabs(rounded) > fabs(b);
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
return;
// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
}
// OK, let's try it in 100% software! Not yet working right.
union {
double d;
u64 i;
} in, out;
in.d = rPS0(_inst.FB);
out = in;
int sign = in.i >> 63;
int exp = (in.i >> 52) & 0x7FF;
u64 mantissa = in.i & 0x000FFFFFFFFFFFFFULL;
u64 mantissa_single = mantissa & 0x000FFFFFE0000000ULL;
u64 leftover_single = mantissa & 0x000000001FFFFFFFULL;
// OK. First make sure that we have a "normal" number.
if (exp >= 1 && exp <= 2046) {
// OK. Check for overflow. TODO
FPSCR.FI = leftover_single != 0; // Inexact
if (leftover_single >= 0x10000000ULL) {
//PanicAlert("rounding up");
FPSCR.FR = 1;
mantissa_single += 0x20000000;
if (mantissa_single & 0x0010000000000000) {
// PanicAlert("renormalizing");
mantissa_single >>= 1;
exp += 1;
// if (exp > 2046) { OVERFLOW }
}
}
out.i = ((u64)sign << 63) | ((u64)exp << 52) | mantissa_single;
} else {
if (!exp && !mantissa) {
// Positive or negative Zero. All is well.
FPSCR.FI = 0;
FPSCR.FR = 0;
} else if (exp == 0 && mantissa) {
// Denormalized number.
PanicAlert("denorm");
} else if (exp == 2047 && !mantissa) {
// Infinite.
//PanicAlert("infinite");
FPSCR.FI = 1;
FPSCR.FR = 1;
// FPSCR.OX = 1;
} else {
//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
}
}
UpdateFPRF(out.d);
FPSCR.FR = 1; // SUPER MONKEY BALL HACK
rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmulx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC);
FPSCR.FI = 0;
FPSCR.FR = 1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmulsx(UGeckoInstruction _inst)
{
double d_value = rPS0(_inst.FA) * rPS0(_inst.FC);
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(d_value);
FPSCR.FI = d_value != rPS0(_inst.FD);
FPSCR.FR = rand()&1;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
FPSCR.FI = 0;
FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmaddsx(UGeckoInstruction _inst)
{
double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(d_value);
FPSCR.FI = d_value != rPS0(_inst.FD);
FPSCR.FR = 0;
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::faddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::faddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 1;
// FPSCR.Hex = (rand() ^ (rand() << 8) ^ (rand() << 16)) & ~(0x000000F8);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fdivx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fdivsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fresx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fnmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::frsqrtex(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0f / (sqrtf(rPS0(_inst.FB)));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fsqrtx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = sqrt(rPS0(_inst.FB));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void CInterpreter::fsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
rPS0(_inst.FD) = sqrt(rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}

View File

@ -72,13 +72,13 @@ void CInterpreter::lfdu(UGeckoInstruction _inst)
void CInterpreter::lfdux(UGeckoInstruction _inst)
{
u32 uAddress = Helper_Get_EA_UX(_inst);
riPS0(_inst.FS) = Memory::Read_U64(uAddress);
riPS0(_inst.FD) = Memory::Read_U64(uAddress);
m_GPR[_inst.RA] = uAddress;
}
void CInterpreter::lfdx(UGeckoInstruction _inst)
{
riPS0(_inst.FS) = Memory::Read_U64(Helper_Get_EA_X(_inst));
riPS0(_inst.FD) = Memory::Read_U64(Helper_Get_EA_X(_inst));
}
void CInterpreter::lfs(UGeckoInstruction _inst)
@ -149,7 +149,7 @@ void CInterpreter::lmw(UGeckoInstruction _inst)
return;
}
m_GPR[iReg] = TempReg;
m_GPR[iReg] = TempReg;
}
}

View File

@ -0,0 +1,337 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <math.h>
#include "Interpreter.h"
#include "../../HW/Memmap.h"
// dequantize table
const float m_dequantizeTable[] =
{
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
};
// quantize table
const float m_quantizeTable[] =
{
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
};
template <class T>
inline T CLAMP(T a, T bottom, T top) {
if (a > top) return top;
if (a < bottom) return bottom;
return a;
}
void CInterpreter::Helper_Quantize(const u32 _Addr, const float _fValue,
const EQuantizeType _quantizeType, const unsigned int _uScale)
{
switch(_quantizeType)
{
case QUANTIZE_FLOAT:
Memory::Write_U32(*(u32*)&_fValue,_Addr);
break;
// used for THP player
case QUANTIZE_U8:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f);
Memory::Write_U8((u8)fResult, _Addr);
}
break;
case QUANTIZE_U16:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f);
Memory::Write_U16((u16)fResult, _Addr);
}
break;
case QUANTIZE_S8:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f);
Memory::Write_U8((u8)(s8)fResult, _Addr);
}
break;
case QUANTIZE_S16:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f);
Memory::Write_U16((u16)(s16)fResult, _Addr);
}
break;
default:
_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
break;
}
}
float CInterpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType,
const unsigned int _uScale)
{
// dequantize the value
float fResult;
switch(_quantizeType)
{
case QUANTIZE_FLOAT:
{
u32 dwValue = Memory::Read_U32(_Addr);
fResult = *(float*)&dwValue;
}
break;
case QUANTIZE_U8:
fResult = static_cast<float>(Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
break;
case QUANTIZE_U16:
fResult = static_cast<float>(Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
break;
case QUANTIZE_S8:
fResult = static_cast<float>((s8)Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
break;
// used for THP player
case QUANTIZE_S16:
fResult = static_cast<float>((s16)Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
break;
default:
_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
fResult = 0;
break;
}
return fResult;
}
void CInterpreter::psq_l(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
int c = 4;
if ((ldType == QUANTIZE_U8) || (ldType == QUANTIZE_S8)) c = 0x1;
if ((ldType == QUANTIZE_U16) || (ldType == QUANTIZE_S16)) c = 0x2;
if (_inst.W == 0)
{
rPS0(_inst.RS) = Helper_Dequantize(EA, ldType, ldScale);
rPS1(_inst.RS) = Helper_Dequantize(EA+c, ldType, ldScale);
}
else
{
rPS0(_inst.RS) = Helper_Dequantize(EA, ldType, ldScale);
rPS1(_inst.RS) = 1.0f;
}
}
void CInterpreter::psq_lu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
int c = 4;
if ((ldType == 4) || (ldType == 6)) c = 0x1;
if ((ldType == 5) || (ldType == 7)) c = 0x2;
if (_inst.W == 0)
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
}
else
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = 1.0f;
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::psq_st(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale );
Helper_Quantize( EA+c, (float)rPS1(_inst.RS), stType, stScale );
}
else
{
Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale );
}
}
void CInterpreter::psq_stu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::psq_lx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
int c = 4;
if ((ldType == 4) || (ldType == 6)) c = 0x1;
if ((ldType == 5) || (ldType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
}
else
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = 1.0f;
}
}
void CInterpreter::psq_stx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
}
}
void CInterpreter::psq_lux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
int c = 4;
if ((ldType == 4) || (ldType == 6)) c = 0x1;
if ((ldType == 5) || (ldType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
}
else
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = 1.0f;
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::psq_stux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
}
m_GPR[_inst.RA] = EA;
}

View File

@ -19,482 +19,11 @@
#include "Interpreter.h"
#include "../../HW/Memmap.h"
// dequantize table
const float m_dequantizeTable[] =
{
1.0 / (1 << 0),
1.0 / (1 << 1),
1.0 / (1 << 2),
1.0 / (1 << 3),
1.0 / (1 << 4),
1.0 / (1 << 5),
1.0 / (1 << 6),
1.0 / (1 << 7),
1.0 / (1 << 8),
1.0 / (1 << 9),
1.0 / (1 << 10),
1.0 / (1 << 11),
1.0 / (1 << 12),
1.0 / (1 << 13),
1.0 / (1 << 14),
1.0 / (1 << 15),
1.0 / (1 << 16),
1.0 / (1 << 17),
1.0 / (1 << 18),
1.0 / (1 << 19),
1.0 / (1 << 20),
1.0 / (1 << 21),
1.0 / (1 << 22),
1.0 / (1 << 23),
1.0 / (1 << 24),
1.0 / (1 << 25),
1.0 / (1 << 26),
1.0 / (1 << 27),
1.0 / (1 << 28),
1.0 / (1 << 29),
1.0 / (1 << 30),
1.0 / (1 << 31),
(1ULL << 32),
(1 << 31),
(1 << 30),
(1 << 29),
(1 << 28),
(1 << 27),
(1 << 26),
(1 << 25),
(1 << 24),
(1 << 23),
(1 << 22),
(1 << 21),
(1 << 20),
(1 << 19),
(1 << 18),
(1 << 17),
(1 << 16),
(1 << 15),
(1 << 14),
(1 << 13),
(1 << 12),
(1 << 11),
(1 << 10),
(1 << 9),
(1 << 8),
(1 << 7),
(1 << 6),
(1 << 5),
(1 << 4),
(1 << 3),
(1 << 2),
(1 << 1),
};
// quantize table
const float m_quantizeTable[] =
{
(1 << 0),
(1 << 1),
(1 << 2),
(1 << 3),
(1 << 4),
(1 << 5),
(1 << 6),
(1 << 7),
(1 << 8),
(1 << 9),
(1 << 10),
(1 << 11),
(1 << 12),
(1 << 13),
(1 << 14),
(1 << 15),
(1 << 16),
(1 << 17),
(1 << 18),
(1 << 19),
(1 << 20),
(1 << 21),
(1 << 22),
(1 << 23),
(1 << 24),
(1 << 25),
(1 << 26),
(1 << 27),
(1 << 28),
(1 << 29),
(1 << 30),
(1 << 31),
1.0 / (1ULL << 32),
1.0 / (1 << 31),
1.0 / (1 << 30),
1.0 / (1 << 29),
1.0 / (1 << 28),
1.0 / (1 << 27),
1.0 / (1 << 26),
1.0 / (1 << 25),
1.0 / (1 << 24),
1.0 / (1 << 23),
1.0 / (1 << 22),
1.0 / (1 << 21),
1.0 / (1 << 20),
1.0 / (1 << 19),
1.0 / (1 << 18),
1.0 / (1 << 17),
1.0 / (1 << 16),
1.0 / (1 << 15),
1.0 / (1 << 14),
1.0 / (1 << 13),
1.0 / (1 << 12),
1.0 / (1 << 11),
1.0 / (1 << 10),
1.0 / (1 << 9),
1.0 / (1 << 8),
1.0 / (1 << 7),
1.0 / (1 << 6),
1.0 / (1 << 5),
1.0 / (1 << 4),
1.0 / (1 << 3),
1.0 / (1 << 2),
1.0 / (1 << 1),
};
template <class T>
inline T CLAMP(T a, T bottom, T top) {
if (a > top) return top;
if (a < bottom) return bottom;
return a;
}
void CInterpreter::Helper_Quantize(const u32 _Addr, const float _fValue,
const EQuantizeType _quantizeType, const unsigned int _uScale)
{
switch(_quantizeType)
{
case QUANTIZE_FLOAT:
Memory::Write_U32(*(u32*)&_fValue,_Addr);
break;
// used for THP player
case QUANTIZE_U8:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 255.0f);
Memory::Write_U8((u8)fResult, _Addr);
}
break;
case QUANTIZE_U16:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], 0.0f, 65535.0f);
Memory::Write_U16((u16)fResult, _Addr);
}
break;
case QUANTIZE_S8:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -128.0f, 127.0f);
Memory::Write_U8((u8)(s8)fResult, _Addr);
}
break;
case QUANTIZE_S16:
{
float fResult = CLAMP(_fValue * m_quantizeTable[_uScale], -32768.0f, 32767.0f);
Memory::Write_U16((u16)(s16)fResult, _Addr);
}
break;
default:
_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
break;
}
}
float CInterpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType,
const unsigned int _uScale)
{
// dequantize the value
float fResult;
switch(_quantizeType)
{
case QUANTIZE_FLOAT:
{
u32 dwValue = Memory::Read_U32(_Addr);
fResult = *(float*)&dwValue;
}
break;
case QUANTIZE_U8:
fResult = static_cast<float>(Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
break;
case QUANTIZE_U16:
fResult = static_cast<float>(Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
break;
case QUANTIZE_S8:
fResult = static_cast<float>((s8)Memory::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
break;
// used for THP player
case QUANTIZE_S16:
fResult = static_cast<float>((s16)Memory::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
break;
default:
_dbg_assert_msg_(GEKKO,0,"PS dequantize","Unknown type to read");
fResult = 0;
break;
}
return fResult;
}
void CInterpreter::psq_l(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
int c = 4;
if ((ldType == QUANTIZE_U8) || (ldType == QUANTIZE_S8)) c = 0x1;
if ((ldType == QUANTIZE_U16) || (ldType == QUANTIZE_S16)) c = 0x2;
if (_inst.W == 0)
{
rPS0(_inst.RS) = Helper_Dequantize(EA, ldType, ldScale);
rPS1(_inst.RS) = Helper_Dequantize(EA+c, ldType, ldScale);
}
else
{
rPS0(_inst.RS) = Helper_Dequantize(EA, ldType, ldScale);
rPS1(_inst.RS) = 1.0f;
}
}
void CInterpreter::psq_lu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
int c = 4;
if ((ldType == 4) || (ldType == 6)) c = 0x1;
if ((ldType == 5) || (ldType == 7)) c = 0x2;
if (_inst.W == 0)
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
}
else
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = 1.0f;
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::psq_st(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale );
Helper_Quantize( EA+c, (float)rPS1(_inst.RS), stType, stScale );
}
else
{
Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale );
}
}
void CInterpreter::psq_stu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = m_GPR[_inst.RA] + _inst.SIMM_12;
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::psq_lx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
int c = 4;
if ((ldType == 4) || (ldType == 6)) c = 0x1;
if ((ldType == 5) || (ldType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
}
else
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = 1.0f;
}
}
void CInterpreter::psq_stx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = _inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB];
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
}
}
void CInterpreter::psq_lux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
const unsigned int ldScale = gqr.LD_SCALE;
const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
int c = 4;
if ((ldType == 4) || (ldType == 6)) c = 0x1;
if ((ldType == 5) || (ldType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = Helper_Dequantize( EA+c, ldType, ldScale );
}
else
{
rPS0(_inst.RS) = Helper_Dequantize( EA, ldType, ldScale );
rPS1(_inst.RS) = 1.0f;
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::psq_stux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
const unsigned int stScale = gqr.ST_SCALE;
const u32 EA = m_GPR[_inst.RA] + m_GPR[_inst.RB];
int c = 4;
if ((stType == 4) || (stType == 6)) c = 0x1;
if ((stType == 5) || (stType == 7)) c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA+c, (float)rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, (float)rPS0(_inst.RS), stType, stScale);
}
m_GPR[_inst.RA] = EA;
}
void CInterpreter::ps_div(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
rPS1(_inst.FD) = rPS1(_inst.FA) / rPS1(_inst.FB);
}
void CInterpreter::ps_sub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
rPS1(_inst.FD) = rPS1(_inst.FA) - rPS1(_inst.FB);
}
void CInterpreter::ps_add(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
rPS1(_inst.FD) = rPS1(_inst.FA) + rPS1(_inst.FB);
}
// These "binary instructions" do not alter FPSCR.
void CInterpreter::ps_sel(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) >= 0.0f) ? rPS0(_inst.FC) : rPS0(_inst.FB);
rPS1(_inst.FD) = (rPS1(_inst.FA) >= 0.0f) ? rPS1(_inst.FC) : rPS1(_inst.FB);
}
void CInterpreter::ps_res(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0f / rPS0(_inst.FB);
rPS1(_inst.FD) = 1.0f / rPS1(_inst.FB);
}
void CInterpreter::ps_mul(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) * rPS0(_inst.FC);
rPS1(_inst.FD) = rPS1(_inst.FA) * rPS1(_inst.FC);
}
void CInterpreter::ps_rsqrte(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0 / sqrt(rPS0(_inst.FB));
rPS1(_inst.FD) = 1.0 / sqrt(rPS1(_inst.FB));
}
void CInterpreter::ps_msub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
rPS1(_inst.FD) = (rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB);
}
void CInterpreter::ps_madd(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
rPS1(_inst.FD) = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
}
void CInterpreter::ps_nmsub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB));
rPS1(_inst.FD) = -(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB));
}
void CInterpreter::ps_nmadd(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB));
rPS1(_inst.FD) = -(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB));
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) >= -0.0) ? rPS0(_inst.FC) : rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) >= -0.0) ? rPS1(_inst.FC) : rPS1(_inst.FB));
}
void CInterpreter::ps_neg(UGeckoInstruction _inst)
@ -521,89 +50,7 @@ void CInterpreter::ps_abs(UGeckoInstruction _inst)
riPS1(_inst.FD) = riPS1(_inst.FB) &~ (1ULL << 63);
}
void CInterpreter::ps_sum0(UGeckoInstruction _inst)
{
double p0 = rPS0(_inst.FA) + rPS1(_inst.FB);
double p1 = rPS1(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_sum1(UGeckoInstruction _inst)
{
double p0 = rPS0(_inst.FC);
double p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_muls0(UGeckoInstruction _inst)
{
double p0 = rPS0(_inst.FA) * rPS0(_inst.FC);
double p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_muls1(UGeckoInstruction _inst)
{
double p0 = rPS0(_inst.FA) * rPS1(_inst.FC);
double p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_madds0(UGeckoInstruction _inst)
{
double p0 = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
double p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_madds1(UGeckoInstruction _inst)
{
double p0 = (rPS0(_inst.FA) * rPS1(_inst.FC)) + rPS0(_inst.FB);
double p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_cmpu0(UGeckoInstruction _inst)
{
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
int compareResult;
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
}
void CInterpreter::ps_cmpo0(UGeckoInstruction _inst)
{
// for now HACK
ps_cmpu0(_inst);
}
void CInterpreter::ps_cmpu1(UGeckoInstruction _inst)
{
double fa = rPS1(_inst.FA);
double fb = rPS1(_inst.FB);
int compareResult;
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
}
void CInterpreter::ps_cmpo1(UGeckoInstruction _inst)
{
// for now HACK
ps_cmpu1(_inst);
}
// These are just moves, double is OK.
void CInterpreter::ps_merge00(UGeckoInstruction _inst)
{
double p0 = rPS0(_inst.FA);
@ -636,6 +83,159 @@ void CInterpreter::ps_merge11(UGeckoInstruction _inst)
rPS1(_inst.FD) = p1;
}
// From here on, the real deal.
void CInterpreter::ps_div(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) / rPS1(_inst.FB));
FPSCR.FI = 0;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
}
void CInterpreter::ps_sub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) - rPS1(_inst.FB));
}
void CInterpreter::ps_add(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) + rPS1(_inst.FB));
}
void CInterpreter::ps_res(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0f / static_cast<float>(rPS0(_inst.FB));
rPS1(_inst.FD) = 1.0f / static_cast<float>(rPS1(_inst.FB));
}
void CInterpreter::ps_mul(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(rPS0(_inst.FA) * rPS0(_inst.FC));
rPS1(_inst.FD) = static_cast<float>(rPS1(_inst.FA) * rPS1(_inst.FC));
}
void CInterpreter::ps_rsqrte(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<double>(1.0f / sqrtf((float)rPS1(_inst.FB)));
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
}
void CInterpreter::ps_msub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) - rPS1(_inst.FB));
}
void CInterpreter::ps_madd(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
rPS1(_inst.FD) = static_cast<float>((rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB));
}
void CInterpreter::ps_nmsub(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) - rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) - rPS1(_inst.FB)));
}
void CInterpreter::ps_nmadd(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = static_cast<float>(-(rPS0(_inst.FA) * rPS0(_inst.FC) + rPS0(_inst.FB)));
rPS1(_inst.FD) = static_cast<float>(-(rPS1(_inst.FA) * rPS1(_inst.FC) + rPS1(_inst.FB)));
}
void CInterpreter::ps_sum0(UGeckoInstruction _inst)
{
double p0 = (float)(rPS0(_inst.FA) + rPS1(_inst.FB));
double p1 = (float)(rPS1(_inst.FC));
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_sum1(UGeckoInstruction _inst)
{
float p0 = rPS0(_inst.FC);
float p1 = rPS0(_inst.FA) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_muls0(UGeckoInstruction _inst)
{
float p0 = rPS0(_inst.FA) * rPS0(_inst.FC);
float p1 = rPS1(_inst.FA) * rPS0(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_muls1(UGeckoInstruction _inst)
{
float p0 = rPS0(_inst.FA) * rPS1(_inst.FC);
float p1 = rPS1(_inst.FA) * rPS1(_inst.FC);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_madds0(UGeckoInstruction _inst)
{
float p0 = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
float p1 = (rPS1(_inst.FA) * rPS0(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_madds1(UGeckoInstruction _inst)
{
float p0 = (rPS0(_inst.FA) * rPS1(_inst.FC)) + rPS0(_inst.FB);
float p1 = (rPS1(_inst.FA) * rPS1(_inst.FC)) + rPS1(_inst.FB);
rPS0(_inst.FD) = p0;
rPS1(_inst.FD) = p1;
}
void CInterpreter::ps_cmpu0(UGeckoInstruction _inst)
{
float fa = rPS0(_inst.FA);
float fb = rPS0(_inst.FB);
int compareResult;
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
}
void CInterpreter::ps_cmpo0(UGeckoInstruction _inst)
{
// for now HACK
ps_cmpu0(_inst);
}
void CInterpreter::ps_cmpu1(UGeckoInstruction _inst)
{
float fa = rPS1(_inst.FA);
float fb = rPS1(_inst.FB);
int compareResult;
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
}
void CInterpreter::ps_cmpo1(UGeckoInstruction _inst)
{
// for now HACK
ps_cmpu1(_inst);
}
// __________________________________________________________________________________________________
// dcbz_l
// TODO(ector) check docs

View File

@ -34,12 +34,22 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include "../../Core.h"
#include "Interpreter.h"
/*
Most of these are together with fctiwx
mffsx: 800c3624
mffsx: 80043c98
mffsx: 8003dd48
mffsx: 8003dd9c
mffsx: 80036608
mffsx: 80036650 (huh?)
*/
// TODO(ector): More proper handling of SSE state.
// That is, set rounding mode etc when entering jit code or the interpreter loop
// Restore rounding mode when calling anything external
void UpdateSSEState(int round, bool daz)
void UpdateSSEState()
{
u32 csr = _mm_getcsr();
@ -51,14 +61,14 @@ void UpdateSSEState(int round, bool daz)
1,
};
csr = csr & 0x9FFF;
csr |= ssetable[round] << 13;
csr |= ssetable[FPSCR.RN] << 13;
// Also handle denormals as zero (FZ + DAZ)
csr &= ~0x8020;
// SETTING DAZ KILLS BEYOND GOOD AND EVIL
// if (daz)
// csr |= 0x8020;
// SETTING FTZ+DAZ KILLS BEYOND GOOD AND EVIL
//if (daz)
// csr |= 0x20; // Only set DAZ //0x8020;
_mm_setcsr(csr);
}
@ -72,7 +82,6 @@ void RestoreSSEState()
void UpdateFPSCR(UReg_FPSCR fp)
{
// Set FPU rounding mode to mimic the PowerPC's
int round = fp.RN;
#ifdef _M_IX86
// This shouldn't really be needed anymore since we use SSE
#ifdef _WIN32
@ -83,7 +92,7 @@ void UpdateFPSCR(UReg_FPSCR fp)
_RC_UP,
_RC_DOWN
};
_set_controlfp(_MCW_RC, table[round]);
_set_controlfp(_MCW_RC, table[fp.RN]);
#else
const unsigned short table[4] =
{
@ -94,19 +103,48 @@ void UpdateFPSCR(UReg_FPSCR fp)
};
unsigned short mode;
asm ("fstcw %0" : : "m" (mode));
mode = (mode & ~FPU_ROUND_MASK) | table[round];
mode = (mode & ~FPU_ROUND_MASK) | table[fp.RN];
asm ("fldcw %0" : : "m" (mode));
#endif
#endif
if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
{
PanicAlert("FPSCR - exceptions enabled. Please report.");
}
// Also corresponding SSE rounding mode setting
UpdateSSEState(round, fp.NI ? true : false);
UpdateSSEState();
}
void CInterpreter::mcrfs(UGeckoInstruction _inst)
{
// TODO(ector): check a ppc manual for this one
u32 fpflags = ((FPSCR.Hex >> (4*(_inst.CRFS))) & 0xF);
FPSCR.Hex &= ~(0xF0000000 >> (_inst.CRFS*4));
switch (_inst.CRFS) {
case 0:
FPSCR.FX = 0;
FPSCR.OX = 0;
break;
case 1:
FPSCR.UX = 0;
FPSCR.ZX = 0;
FPSCR.XX = 0;
FPSCR.VXSNAN = 0;
break;
case 2:
FPSCR.VXISI = 0;
FPSCR.VXIDI = 0;
FPSCR.VXZDZ = 0;
FPSCR.VXIMZ = 0;
break;
case 3:
FPSCR.VXVC = 0;
break;
case 5:
FPSCR.VXSOFT = 0;
FPSCR.VXSQRT = 0;
FPSCR.VXCVI = 0;
break;
}
SetCRField(_inst.CRFD, fpflags);
UpdateFPSCR(FPSCR);
}
@ -127,8 +165,6 @@ void CInterpreter::mcrfs(UGeckoInstruction _inst)
#define MXCSR_ROUND (16384|8192)
#define MXCSR_FLUSH 32768
void CInterpreter::mffsx(UGeckoInstruction _inst)
{
// load from FPSCR
@ -136,31 +172,35 @@ void CInterpreter::mffsx(UGeckoInstruction _inst)
// TODO(ector): grab all overflow flags etc and set them in FPSCR
riPS0(_inst.FD) = (u64)FPSCR.Hex;
if (_inst.Rc) PanicAlert("mffsx: inst_.Rc");
}
void CInterpreter::mtfsb0x(UGeckoInstruction _inst)
{
FPSCR.Hex &= (~(0x80000000 >> _inst.CRBD));
UpdateFPSCR(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb0x: inst_.Rc");
}
void CInterpreter::mtfsb1x(UGeckoInstruction _inst)
{
FPSCR.Hex |= 0x80000000 >> _inst.CRBD;
UpdateFPSCR(FPSCR);
if (_inst.Rc) PanicAlert("mtfsb1x: inst_.Rc");
}
void CInterpreter::mtfsfix(UGeckoInstruction _inst)
{
u32 mask = (0xF0000000 >> (4*_inst.CRFD));
u32 mask = (0xF0000000 >> (4 * _inst.CRFD));
u32 imm = (_inst.hex << 16) & 0xF0000000;
FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4*_inst.CRFD));
FPSCR.Hex = (FPSCR.Hex & ~mask) | (imm >> (4 * _inst.CRFD));
UpdateFPSCR(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfix: inst_.Rc");
}
void CInterpreter::mtfsfx(UGeckoInstruction _inst)
{
u32 fm = _inst.FM;
u32 fm = _inst.FM;
u32 m = 0;
for (int i = 0; i < 8; i++) { //7?? todo check
if (fm & (1 << i))
@ -169,6 +209,7 @@ void CInterpreter::mtfsfx(UGeckoInstruction _inst)
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
UpdateFPSCR(FPSCR);
if (_inst.Rc) PanicAlert("mtfsfx: inst_.Rc");
}
void CInterpreter::mcrxr(UGeckoInstruction _inst)
@ -240,7 +281,7 @@ void CInterpreter::mtsrin(UGeckoInstruction _inst)
void CInterpreter::mftb(UGeckoInstruction _inst)
{
int iIndex = (_inst.TBR >> 5) | ((_inst.TBR&0x1F) << 5);
int iIndex = (_inst.TBR >> 5) | ((_inst.TBR & 0x1F) << 5);
if (iIndex == 268) m_GPR[_inst.RD] = TL;
else if (iIndex == 269) m_GPR[_inst.RD] = TU;
else _dbg_assert_(GEKKO,0);
@ -449,4 +490,3 @@ void CInterpreter::isync(UGeckoInstruction _inst)
{
//shouldnt do anything
}

View File

@ -231,6 +231,21 @@ namespace Jit64
JitState js;
JitOptions jo;
void Init()
{
jo.optimizeStack = true;
jo.enableBlocklink = true; // Speed boost, but not 100% safe
#ifdef _M_X64
jo.enableFastMem = Core::GetStartupParameter().bUseFastMem;
#else
jo.enableFastMem = false;
#endif
jo.assumeFPLoadFromMem = true;
jo.fpAccurateFlags = true;
jo.optimizeGatherPipe = true;
jo.interpretFPU = false;
}
void WriteCallInterpreter(UGeckoInstruction _inst)
{
gpr.Flush(FLUSH_ALL);
@ -263,12 +278,6 @@ namespace Jit64
// Yup, just don't do anything.
}
// RESULTS (running kururin with optimizations on)
// at block 13968 they diverge.
// linux goes to 8010fe54
// windoze goes to 8010feb0
// after they they are completely out of sync.
// branches from the cmp result of r0, which comes from an lbz (loaded from stack)
static const bool ImHereDebug = false;
static const bool ImHereLog = false;
static std::map<u32, int> been_here;
@ -403,7 +412,12 @@ namespace Jit64
js.op = &ops[i];
js.instructionNumber = i;
if (i == (int)size - 1) js.isLastInstruction = true;
// const GekkoOpInfo *info = GetOpInfo();
// if (js.isLastInstruction)
if (jo.interpretFPU && PPCTables::UsesFPU(ops[i].inst))
Default(ops[i].inst);
else
PPCTables::CompileInstruction(ops[i].inst);
// else
// Default(ops[i].inst);

View File

@ -70,11 +70,14 @@ namespace Jit64
bool fpAccurateFlags;
bool enableFastMem;
bool optimizeGatherPipe;
bool interpretFPU;
};
extern JitState js;
extern JitOptions jo;
void Init();
void Default(UGeckoInstruction _inst);
void DoNothing(UGeckoInstruction _inst);

View File

@ -76,17 +76,6 @@ namespace Jit64
void InitCache()
{
jo.optimizeStack = true;
jo.enableBlocklink = true; // Speed boost, but not 100% safe
#ifdef _M_X64
jo.enableFastMem = Core::GetStartupParameter().bUseFastMem;
#else
jo.enableFastMem = false;
#endif
jo.assumeFPLoadFromMem = true;
jo.fpAccurateFlags = true;
jo.optimizeGatherPipe = true;
codeCache = (u8*)AllocateExecutableMemory(CODE_SIZE);
genFunctions = (u8*)AllocateExecutableMemory(GEN_SIZE);
trampolineCache = (u8*)AllocateExecutableMemory(TRAMPOLINE_SIZE);

View File

@ -17,6 +17,7 @@
#include "JitCore.h"
#include "JitCache.h"
#include "JitAsm.h"
#include "Jit.h"
#include "../../HW/Memmap.h"
#include "../../HW/CPU.h"
@ -31,6 +32,7 @@ namespace Jit64
{
void Jit64Core::Init()
{
::Jit64::Init();
InitCache();
Asm::compareEnabled = Core::g_CoreStartupParameter.bRunCompareClient;
}

View File

@ -122,9 +122,9 @@ namespace Jit64
bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0;
bool ctrDecremented = false;
if ((inst.BO & 16) == 0) // Test CR with a combination of bits
if ((inst.BO & 16) == 0) // Test a CR bit
{
TEST(32, M(&CR), Imm32(0x80000000>>inst.BI));
TEST(32, M(&CR), Imm32(0x80000000 >> inst.BI));
if (inst.BO & 8) // Conditional branch
branch = CC_NZ;
else

View File

@ -150,7 +150,6 @@ namespace Jit64
fpr.UnlockAll();
}
void fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START;

View File

@ -25,6 +25,7 @@
#include "Jit.h"
#include "JitCache.h"
#include "JitRegCache.h"
#include "Jit_Util.h"
// TODO
// ps_madds0
@ -198,7 +199,7 @@ namespace Jit64
op(XMM0, Gen::R(XMM1));
MOVAPD(fpr.RX(d), Gen::R(XMM0));
}
//fpr.SetDirty(fpr.RX(d));
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
@ -308,6 +309,7 @@ namespace Jit64
}
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0));
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}

View File

@ -19,6 +19,7 @@
#include "Common.h"
#include "PPCTables.h"
#include "StringUtil.h"
#include "Interpreter/Interpreter.h"
#if defined(_M_IX86) || defined(_M_X64)
@ -409,7 +410,7 @@ GekkoOPTemplate table59[] =
{18, CInterpreter::fdivsx, Jit64::fp_arith_s, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}},
{20, CInterpreter::fsubsx, Jit64::fp_arith_s, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
{21, CInterpreter::faddsx, Jit64::fp_arith_s, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F}},
{22, CInterpreter::fsqrtsx, Jit64::Default, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
// {22, CInterpreter::fsqrtsx, Jit64::Default, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko
{24, CInterpreter::fresx, Jit64::Default, {"fresx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, CInterpreter::fmulsx, Jit64::fp_arith_s, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}},
{28, CInterpreter::fmsubsx, Jit64::fmaddXX, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}},
@ -637,11 +638,26 @@ void PPCTables::InitTables()
m_allInstructions[m_numInstructions++] = &table63[i].opinfo;
for (int i = 0; i < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); i++)
m_allInstructions[m_numInstructions++] = &table63_2[i].opinfo;
if (m_numInstructions >= 2048) {
PanicAlert("m_allInstructions underdimensioned");
}
}
namespace {
std::vector<u32> rsplocations;
}
void PPCTables::CompileInstruction(UGeckoInstruction _inst)
{
dynaOpTable[_inst.OPCD](_inst);
GekkoOPInfo *info = GetOpInfo(_inst);
if (info) {
if (!strcmp(info->opname, "mffsx")) {
rsplocations.push_back(Jit64::js.compilerPC);
}
info->compileCount++;
info->lastUse = Jit64::js.compilerPC;
}
}
bool PPCTables::IsValidInstruction(UGeckoInstruction _instCode)
@ -685,3 +701,30 @@ void PPCTables::PrintInstructionRunCounts()
LOG(GEKKO, "%s : %i", temp[i].name,temp[i].count);
}
}
void PPCTables::LogCompiledInstructions()
{
static int time = 0;
FILE *f = fopen(StringFromFormat("inst_log%i.txt", time).c_str(), "w");
for (int i = 0; i < m_numInstructions; i++)
{
if (m_allInstructions[i]->compileCount > 0) {
fprintf(f, "%s\t%i\t%i\t%08x\n", m_allInstructions[i]->opname, m_allInstructions[i]->compileCount, m_allInstructions[i]->runCount, m_allInstructions[i]->lastUse);
}
}
fclose(f);
f = fopen(StringFromFormat("inst_not%i.txt", time).c_str(), "w");
for (int i = 0; i < m_numInstructions; i++)
{
if (m_allInstructions[i]->compileCount == 0) {
fprintf(f, "%s\t%i\t%i\n", m_allInstructions[i]->opname, m_allInstructions[i]->compileCount, m_allInstructions[i]->runCount);
}
}
fclose(f);
f = fopen(StringFromFormat("rsp_at.txt", time).c_str(), "w");
for (int i = 0; i < rsplocations.size(); i++) {
fprintf(f, "mffsx: %08x\n", rsplocations[i]);
}
fclose(f);
time++;
}

View File

@ -73,6 +73,8 @@ struct GekkoOPInfo
int flags;
int numCyclesMinusOne;
int runCount;
int compileCount;
u32 lastUse;
};
@ -92,6 +94,7 @@ public:
static void CountInstruction(UGeckoInstruction _inst);
static void PrintInstructionRunCounts();
static void LogCompiledInstructions();
static void CompileInstruction(UGeckoInstruction _inst);
};

View File

@ -63,6 +63,7 @@ files = ["Console.cpp",
"PowerPC/Interpreter/Interpreter_FloatingPoint.cpp",
"PowerPC/Interpreter/Interpreter_Paired.cpp",
"PowerPC/Interpreter/Interpreter_LoadStore.cpp",
"PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp",
"PowerPC/Interpreter/Interpreter_SystemRegisters.cpp",
"PowerPC/Jit64/Jit.cpp",
"PowerPC/Jit64/JitCore.cpp",

View File

@ -46,6 +46,7 @@
#include "Debugger/PPCDebugInterface.h"
#include "Debugger/Debugger_SymbolMap.h"
#include "PowerPC/PPCAnalyst.h"
#include "PowerPC/PPCTables.h"
#include "PowerPC/Jit64/Jit.h"
#include "PowerPC/Jit64/JitCache.h"
@ -71,6 +72,9 @@ BEGIN_EVENT_TABLE(CCodeWindow, wxFrame)
EVT_MENU(IDM_SCANFUNCTIONS, CCodeWindow::OnSymbolsMenu)
EVT_MENU(IDM_LOADMAPFILE, CCodeWindow::OnSymbolsMenu)
EVT_MENU(IDM_SAVEMAPFILE, CCodeWindow::OnSymbolsMenu)
EVT_MENU(IDM_CLEARCODECACHE, CCodeWindow::OnJitMenu)
EVT_MENU(IDM_LOGINSTRUCTIONS, CCodeWindow::OnJitMenu)
// toolbar
EVT_MENU(IDM_DEBUG_GO, CCodeWindow::OnCodeStep)
EVT_MENU(IDM_STEP, CCodeWindow::OnCodeStep)
@ -244,6 +248,7 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam
{
wxMenu *pJitMenu = new wxMenu;
pJitMenu->Append(IDM_CLEARCODECACHE, _T("&Clear code cache"));
pJitMenu->Append(IDM_LOGINSTRUCTIONS, _T("&Log JIT instruction coverage"));
pMenuBar->Append(pJitMenu, _T("&JIT"));
}
@ -275,6 +280,9 @@ void CCodeWindow::OnJitMenu(wxCommandEvent& event)
case IDM_CLEARCODECACHE:
Jit64::ClearCache();
break;
case IDM_LOGINSTRUCTIONS:
PPCTables::LogCompiledInstructions();
break;
}
}

View File

@ -80,6 +80,7 @@ class CCodeWindow
IDM_BREAKPOINTWINDOW,
IDM_MEMORYWINDOW,
IDM_SCANFUNCTIONS,
IDM_LOGINSTRUCTIONS,
IDM_LOADMAPFILE,
IDM_SAVEMAPFILE,
IDM_CLEARCODECACHE,

View File

@ -303,7 +303,7 @@ void CFrame::OnOpen(wxCommandEvent& WXUNUSED (event))
wxEmptyString, wxEmptyString, wxEmptyString,
wxString::Format
(
_T("Elf files (*.elf)|*.elf|DOL files (*.dol)|*.dol|Gamecube/Wii ISO (*.iso;*.gcm)|*.iso;*.gcm|All files (%s)|%s"),
_T("All GC/Wii files (elf, dol, gcm, iso)|*.elf;*.dol;*.gcm;*.iso|All files (%s)|%s"),
wxFileSelectorDefaultWildcardStr,
wxFileSelectorDefaultWildcardStr
),

View File

@ -68,7 +68,7 @@ class CGameListCtrl : public wxListCtrl
void OnEditPatchFile(wxCommandEvent& event);
void OnOpenContainingFolder(wxCommandEvent& event);
virtual bool MSWDrawSubItem(wxPaintDC& rPainDC, int item, int subitem);
virtual bool MSWDrawSubItem(wxPaintDC& rPaintDC, int item, int subitem);
void AutomaticColumnWidth();
};

View File

@ -83,6 +83,7 @@ void Mixer_PushSamples(short *buffer, int num_stereo_samples, int sample_rate) {
static int PV1r=0,PV2r=0,PV3r=0,PV4r=0;
static int acc=0;
if (!GetAsyncKeyState(VK_TAB)) {
while (queue_size > queue_maxlength / 2) {
#ifdef _WIN32
DSound::DSound_UpdateSound();
@ -91,7 +92,9 @@ void Mixer_PushSamples(short *buffer, int num_stereo_samples, int sample_rate) {
sleep(0);
#endif
}
} else {
return;
}
//convert into config option?
const int mode = 2;

View File

@ -210,16 +210,6 @@ bool FifoCommandRunnable(void)
void Decode(void)
{
static int DecoderCount = 0;
DecoderCount++;
if (DecoderCount == 0x0019c601)
{
int i = 0;
}
// 0x0019c603 <- error
int Cmd = g_pDataReader->Read8();
switch(Cmd)
{
@ -236,7 +226,6 @@ void Decode(void)
case GX_LOAD_XF_REG:
{
u32 test = PeekFifo32(0);
u32 Cmd2 = g_pDataReader->Read32();
int dwTransferSize = ((Cmd2>>16)&15) + 1;

View File

@ -145,23 +145,26 @@ bool FifoCommandRunnable(void)
case GX_LOAD_XF_REG:
{
// check if we can read the header
if (iBufferSize >= 5) {
if (iBufferSize >= 5)
{
iCommandSize = 1 + 4;
u32 Cmd2 = PeekFifo32(1);
int dwTransferSize = ((Cmd2>>16)&15) + 1;
int dwTransferSize = ((Cmd2 >> 16) & 15) + 1;
iCommandSize += dwTransferSize * 4;
}
else {
else
{
return false;
}
}
break;
default:
if (Cmd&0x80)
if (Cmd & 0x80)
{
// check if we can read the header
if (iBufferSize >= 3) {
if (iBufferSize >= 3)
{
iCommandSize = 1 + 2;
u16 numVertices = PeekFifo16(1);
VertexLoader& vtxLoader = g_VertexLoaders[Cmd & GX_VAT_MASK];
@ -189,7 +192,7 @@ bool FifoCommandRunnable(void)
if (iCommandSize > iBufferSize)
return false;
INFO_LOG("OP detected: Cmd 0x%x size %i buffer %i",Cmd, iCommandSize, iBufferSize);
// INFO_LOG("OP detected: Cmd 0x%x size %i buffer %i",Cmd, iCommandSize, iBufferSize);
return true;
}
@ -268,7 +271,7 @@ void Decode(void)
{
// load vertices
u16 numVertices = g_pDataReader->Read16();
if( numVertices > 0 ) {
if (numVertices > 0) {
g_VertexLoaders[Cmd & GX_VAT_MASK].RunVertices((Cmd & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices);
}
}