Jit64[IL]: fix float conversions

Floating-point is complicated... Some background: Denormals are floats that are too close to zero to be stored in a normalized way (their exponent would need more bits). Since they are stored unnormalized, they are hard to work with, even in hardware. That's why both PowerPC and SSE can be configured to operate in faster but non-standard-conpliant modes in which these numbers are simply rounded ('flushed') to zero. Internally, we do the same as the PowerPC CPU and store all floats in double format. This means that for loading and storing singles we need a conversion. The PowerPC CPU does this in hardware. We previously did this using CVTSS2SD/CVTSD2SS. Unfortunately, these instructions are considered arithmetic and therefore flush denormals to zero if non-IEEE mode is active. This normally wouldn't be a problem since the next arithmetic floating-point instruction would do the same anyway but as it turns out some games actually use floating-point instructions for copying arbitrary data. My idea for fixing this problem was to use x87 instructions since the x87 FPU never supported flush-to-zero and thus doesn't mangle denormals. However, there is one more problem to deal with: SNaNs are automatically converted to QNaNs (by setting the most-significant bit of the fraction). I opted to fix this by manually resetting the QNaN bit of all values with all-1s exponent.
2025-07-23 14:19:46 -06:00 · 2014-02-03 23:56:11 +01:00
parent c25c4a6e20
commit db196d8c5b
8 changed files with 173 additions and 37 deletions
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@ -1437,7 +1437,19 @@ void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {
 	Write8(0x0f);
 	Write8(0x38);
 	Write8(0x00);
-	arg.WriteRest(this, 0);
+	arg.WriteRest(this);
+}
+
+void XEmitter::PTEST(X64Reg dest, OpArg arg) {
+	if (!cpu_info.bSSE4_1) {
+		PanicAlert("Trying to use PTEST on a system that doesn't support it. Nobody hears your screams.");
+	}
+	Write8(0x66);
+	Write8(0x0f);
+	Write8(0x38);
+	Write8(0x17);
+	arg.operandReg = dest;
+	arg.WriteRest(this);
 }

 void XEmitter::PAND(X64Reg dest, OpArg arg)     {WriteSSEOp(64, 0xDB, true, dest, arg);}
@ -1497,6 +1509,8 @@ void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(64,
 void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(64, sseMUL, false, regOp1, regOp2, arg);}
 void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(64, sseDIV, false, regOp1, regOp2, arg);}
 void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)  {WriteAVXOp(64, sseSQRT, false, regOp1, regOp2, arg);}
+void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg)    {WriteAVXOp(64, sseAND, false, regOp1, regOp2, arg);}
+void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(64, sseANDN, false, regOp1, regOp2, arg);}

 // Prefixes

@ -1526,6 +1540,7 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, OpArg arg)
 void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, src);}
 void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, dest);}
 void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, dest);}
+void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }

 void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); }

--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@ -433,9 +433,27 @@ public:
 	void REPNE();

 	// x87
+	enum x87StatusWordBits {
+		x87_InvalidOperation = 0x1,
+		x87_DenormalizedOperand = 0x2,
+		x87_DivisionByZero = 0x4,
+		x87_Overflow = 0x8,
+		x87_Underflow = 0x10,
+		x87_Precision = 0x20,
+		x87_StackFault = 0x40,
+		x87_ErrorSummary = 0x80,
+		x87_C0 = 0x100,
+		x87_C1 = 0x200,
+		x87_C2 = 0x400,
+		x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
+		x87_C3 = 0x4000,
+		x87_FPUBusy = 0x8000,
+	};
+
 	void FLD(int bits, OpArg src);
 	void FST(int bits, OpArg dest);
 	void FSTP(int bits, OpArg dest);
+	void FNSTSW_AX();
 	void FWAIT();

 	// SSE/SSE2: Floating point arithmetic
@ -562,6 +580,7 @@ public:
 	void PUNPCKLWD(X64Reg dest, const OpArg &arg);
 	void PUNPCKLDQ(X64Reg dest, const OpArg &arg);

+	void PTEST(X64Reg dest, OpArg arg);
 	void PAND(X64Reg dest, OpArg arg);
 	void PANDN(X64Reg dest, OpArg arg);
 	void PXOR(X64Reg dest, OpArg arg);
@ -631,6 +650,8 @@ public:
 	void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+	void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+	void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);

 	void RTDSC();

--- a/Source/Core/Common/x64FPURoundMode.cpp
+++ b/Source/Core/Common/x64FPURoundMode.cpp
@ -16,11 +16,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
 #endif

 // OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
-const u32 EXCEPTION_MASK = 0x1F80;
+static const u32 EXCEPTION_MASK = 0x1F80;
 // Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
-const u32 DAZ = 0x40;
+static const u32 DAZ = 0x40;
 // Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
-const u32 FTZ = 0x8000;
+static const u32 FTZ = 0x8000;

 namespace FPURoundMode
 {