JitArm64: Fix fmul rounding issues

This is a port of 4f18f60 to JitArm64.
This commit is contained in:
JosJuice
2021-05-15 15:41:28 +02:00
parent 66e912a252
commit 11be2314fe
5 changed files with 155 additions and 20 deletions

View File

@ -2294,6 +2294,15 @@ void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode,
(opcode << 12) | (1 << 11) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
ARM64Reg Rm)
{
ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "%s only supports double and single registers!", __func__);
Write32((1 << 30) | (U << 29) | (0b11110001 << 21) | (size << 22) | (DecodeReg(Rm) << 16) |
(opcode << 11) | (1 << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
}
void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
ARM64Reg Rm)
{
@ -3103,6 +3112,11 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
}
// Scalar - 2 Source
void ARM64FloatEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
ASSERT_MSG(DYNA_REC, IsDouble(Rd), "%s only supports double registers!", __func__);
EmitScalarThreeSame(0, 3, 0b10000, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
@ -3174,10 +3188,18 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8)
}
// Vector
void ARM64FloatEmitter::ADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, size >> 6, 0b10000, Rd, Rn, Rm);
}
void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, 1, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
@ -3285,6 +3307,10 @@ void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, 2, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, 3, 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);
@ -3864,11 +3890,10 @@ void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift)
EncodeModImm(Q, op, cmode, 0, Rd, abcdefgh);
}
void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
void ARM64FloatEmitter::ORR_BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift, u8 op)
{
bool Q = IsQuad(Rd);
u8 cmode = 1;
u8 op = 1;
if (size == 16)
{
ASSERT_MSG(DYNA_REC, shift == 0 || shift == 8, "%s(size16) only supports shift of {0, 8}!",
@ -3904,6 +3929,16 @@ void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
EncodeModImm(Q, op, cmode, 0, Rd, imm);
}
void ARM64FloatEmitter::ORR(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
{
ORR_BIC(size, Rd, imm, shift, 0);
}
void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
{
ORR_BIC(size, Rd, imm, shift, 1);
}
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
{
bool bundled_loadstore = false;

View File

@ -998,6 +998,7 @@ public:
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
// Scalar - 2 Source
void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -1018,7 +1019,9 @@ public:
void FMOV(ARM64Reg Rd, uint8_t imm8);
// Vector
void ADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
@ -1041,6 +1044,7 @@ public:
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void NOT(ARM64Reg Rd, ARM64Reg Rn);
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void MOV(ARM64Reg Rd, ARM64Reg Rn) { ORR(Rd, Rn, Rn); }
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
@ -1126,6 +1130,7 @@ public:
// Modified Immediate
void MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift = 0);
void ORR(u8 size, ARM64Reg Rd, u8 imm, u8 shift = 0);
void BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift = 0);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = ARM64Reg::INVALID_REG,
@ -1143,6 +1148,7 @@ private:
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
ARM64Reg Rm);
void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
@ -1175,6 +1181,8 @@ private:
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);
void ORR_BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift, u8 op);
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);