Ryujinx/ARMeilleure/Instructions/InstEmitSimdCvt32.cs

using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using System.Reflection;

using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;

namespace ARMeilleure.Instructions
{
    static partial class InstEmit32
    {
        private static int FlipVdBits(int vd, bool lowBit)
        {
            if (lowBit)
            {
                // Move the low bit to the top.
                return ((vd & 0x1) << 4) | (vd >> 1);
            }
            else
            {
                // Move the high bit to the bottom.
                return ((vd & 0xf) << 1) | (vd >> 4);
            }
        }

        private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
        {
            MethodInfo info;

            if (op1.Type == OperandType.FP64)
            {
                info = unsigned
                    ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
                    : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
            }
            else
            {
                info = unsigned
                    ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
                    : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
            }

            return context.Call(info, op1);
        }

        public static void Vcvt_V(ArmEmitterContext context)
        {
            OpCode32Simd op = (OpCode32Simd)context.CurrOp;

            bool unsigned = (op.Opc & 1) != 0;
            bool toInteger = (op.Opc & 2) != 0;
            OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;

            if (toInteger)
            {
                if (Optimizations.UseSse41)
                {
                    EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
                }
                else
                {
                    EmitVectorUnaryOpF32(context, (op1) =>
                    {
                        return EmitSaturateFloatToInt(context, op1, unsigned);
                    });
                }
            }
            else
            {
                if (Optimizations.UseSse2)
                {
                    EmitVectorUnaryOpSimd32(context, (n) =>
                    {
                        if (unsigned)
                        {
                            Operand mask = X86GetAllElements(context, 0x47800000);

                            Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
                            res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
                            res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);

                            Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
                            res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
                            res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);

                            return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
                        }
                        else
                        {
                            return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
                        }
                    });
                }
                else
                {
                    if (unsigned)
                    {
                        EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
                    }
                    else
                    {
                        EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
                    }
                }
            }
        }

        public static void Vcvt_FD(ArmEmitterContext context)
        {
            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;

            int vm = op.Vm;
            int vd;
            if (op.Size == 3)
            {
                vd = FlipVdBits(op.Vd, false);
                // Double to single.
                Operand fp = ExtractScalar(context, OperandType.FP64, vm);

                Operand res = context.ConvertToFP(OperandType.FP32, fp);

                InsertScalar(context, vd, res);
            }
            else
            {
                vd = FlipVdBits(op.Vd, true);
                // Single to double.
                Operand fp = ExtractScalar(context, OperandType.FP32, vm);

                Operand res = context.ConvertToFP(OperandType.FP64, fp);

                InsertScalar(context, vd, res);
            }
        }

        // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
        public static void Vcvt_FI(ArmEmitterContext context)
        {
            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;

            bool toInteger = (op.Opc2 & 0b100) != 0;

            OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;

            if (toInteger)
            {
                bool unsigned = (op.Opc2 & 1) == 0;
                bool roundWithFpscr = op.Opc != 1;

                if (!roundWithFpscr && Optimizations.UseSse41)
                {
                    EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
                }
                else
                {
                    Operand toConvert = ExtractScalar(context, floatSize, op.Vm);

                    // TODO: Fast Path.
                    if (roundWithFpscr)
                    {
                        toConvert = EmitRoundByRMode(context, toConvert);
                    }

                    // Round towards zero.
                    Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);

                    InsertScalar(context, op.Vd, asInteger);
                }
            }
            else
            {
                bool unsigned = op.Opc == 0;

                Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);

                Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);

                InsertScalar(context, op.Vd, asFloat);
            }
        }

        private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
        {
            IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;

            string name = nameof(Math.Round);

            MethodInfo info = (op.Size & 1) == 0
                ? typeof(MathF).GetMethod(name, new Type[] { typeof(float),  typeof(MidpointRounding) })
                : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });

            return context.Call(info, n, Const((int)roundMode));
        }

        private static FPRoundingMode RMToRoundMode(int rm)
        {
            FPRoundingMode roundMode;
            switch (rm)
            {
                case 0b00:
                    roundMode = FPRoundingMode.ToNearestAway;
                    break;
                case 0b01:
                    roundMode = FPRoundingMode.ToNearest;
                    break;
                case 0b10:
                    roundMode = FPRoundingMode.TowardsPlusInfinity;
                    break;
                case 0b11:
                    roundMode = FPRoundingMode.TowardsMinusInfinity;
                    break;
                default:
                    throw new ArgumentOutOfRangeException(nameof(rm));
            }
            return roundMode;
        }

        // VCVTA/M/N/P (floating-point).
        public static void Vcvt_RM(ArmEmitterContext context)
        {
            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).

            OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;

            bool unsigned = op.Opc == 0;
            int rm = op.Opc2 & 3;

            if (Optimizations.UseSse41)
            {
                EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
            }
            else
            {
                Operand toConvert = ExtractScalar(context, floatSize, op.Vm);

                switch (rm)
                {
                    case 0b00: // Away
                        toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
                        break;
                    case 0b01: // Nearest
                        toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
                        break;
                    case 0b10: // Towards positive infinity
                        toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
                        break;
                    case 0b11: // Towards negative infinity
                        toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
                        break;
                }

                Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);

                InsertScalar(context, op.Vd, asInteger);
            }
        }

        // VRINTA/M/N/P (floating-point).
        public static void Vrint_RM(ArmEmitterContext context)
        {
            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;

            OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;

            int rm = op.Opc2 & 3;

            if (Optimizations.UseSse41)
            {
                EmitScalarUnaryOpSimd32(context, (m) =>
                {
                    FPRoundingMode roundMode = RMToRoundMode(rm);

                    if (roundMode != FPRoundingMode.ToNearestAway)
                    {
                        Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
                        return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
                    }
                    else
                    {
                        return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
                    }
                });
            }
            else
            {
                Operand toConvert = ExtractScalar(context, floatSize, op.Vm);

                switch (rm)
                {
                    case 0b00: // Away
                        toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
                        break;
                    case 0b01: // Nearest
                        toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
                        break;
                    case 0b10: // Towards positive infinity
                        toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
                        break;
                    case 0b11: // Towards negative infinity
                        toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
                        break;
                }

                InsertScalar(context, op.Vd, toConvert);
            }
        }

        // VRINTA (vector).
        public static void Vrinta_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse41)
            {
                EmitVectorUnaryOpSimd32(context, (m) =>
                {
                    return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
                });
            }
            else
            {
                EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
            }
        }

        // VRINTM (vector).
        public static void Vrintm_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse2)
            {
                EmitVectorUnaryOpSimd32(context, (m) =>
                {
                    return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
                });
            }
            else
            {
                EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
            }
        }

        // VRINTN (vector).
        public static void Vrintn_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse2)
            {
                EmitVectorUnaryOpSimd32(context, (m) =>
                {
                    return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
                });
            }
            else
            {
                EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
            }
        }

        // VRINTP (vector).
        public static void Vrintp_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse2)
            {
                EmitVectorUnaryOpSimd32(context, (m) =>
                {
                    return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
                });
            }
            else
            {
                EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
            }
        }

        // VRINTZ (floating-point).
        public static void Vrint_Z(ArmEmitterContext context)
        {
            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                EmitScalarUnaryOpSimd32(context, (m) =>
                {
                    Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
                    return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
                });
            }
            else
            {
                EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
            }
        }

        // VRINTX (floating-point).
        public static void Vrintx_S(ArmEmitterContext context)
        {
            EmitScalarUnaryOpF32(context, (op1) =>
            {
                return EmitRoundByRMode(context, op1);
            });
        }

        private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
        {
            Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);

            if (signed)
            {
                return context.ConvertToFP(type, value);
            }
            else
            {
                return context.ConvertToFPUI(type, value);
            }
        }

        private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
        {
            // A port of the similar round function in InstEmitSimdCvt.
            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;

            bool doubleSize = (op.Size & 1) != 0;
            int shift = doubleSize ? 1 : 2;
            Operand n = GetVecA32(op.Vm >> shift);
            n = EmitSwapScalar(context, n, op.Vm, doubleSize);

            if (!doubleSize)
            {
                Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
                nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);

                if (roundMode != FPRoundingMode.ToNearestAway)
                {
                    nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
                }
                else
                {
                    nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
                }

                Operand zero = context.VectorZero();

                Operand nCmp;
                Operand nIntOrLong2 = default;

                if (!signed)
                {
                    nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                    nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
                }

                int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)

                Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);

                Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);

                if (!signed)
                {
                    nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);

                    nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                    nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);

                    nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
                }

                nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));

                Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);

                Operand dRes;
                if (signed)
                {
                    dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
                }
                else
                {
                    dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
                    dRes = context.Add(dRes, nIntOrLong);
                }

                InsertScalar(context, op.Vd, dRes);
            }
            else
            {
                Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
                nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);

                if (roundMode != FPRoundingMode.ToNearestAway)
                {
                    nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
                }
                else
                {
                    nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
                }

                Operand zero = context.VectorZero();

                Operand nCmp;
                Operand nIntOrLong2 = default;

                if (!signed)
                {
                    nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                    nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
                }

                long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)

                Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);

                Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);

                if (!signed)
                {
                    nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);

                    nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                    nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);

                    nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
                }

                nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));

                Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
                nLong = context.ConvertI64ToI32(nLong);

                Operand dRes;
                if (signed)
                {
                    dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
                }
                else
                {
                    dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
                    dRes = context.Add(dRes, nIntOrLong);
                }

                InsertScalar(context, op.Vd, dRes);
            }
        }

        private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
        {
            OpCode32Simd op = (OpCode32Simd)context.CurrOp;

            EmitVectorUnaryOpSimd32(context, (n) =>
            {
                int sizeF = op.Size & 1;

                if (sizeF == 0)
                {
                    Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
                    nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);

                    nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));

                    Operand zero = context.VectorZero();
                    Operand nCmp;
                    if (!signed)
                    {
                        nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                        nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
                    }

                    Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)

                    Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
                    Operand nInt2 = default;

                    if (!signed)
                    {
                        nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);

                        nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                        nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);

                        nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
                    }

                    nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));

                    if (signed)
                    {
                        return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
                    }
                    else
                    {
                        Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
                        return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
                    }
                }
                else /* if (sizeF == 1) */
                {
                    Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
                    nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);

                    nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));

                    Operand zero = context.VectorZero();
                    Operand nCmp;
                    if (!signed)
                    {
                        nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                        nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
                    }

                    Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)

                    Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
                    Operand nLong2 = default;

                    if (!signed)
                    {
                        nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);

                        nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
                        nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);

                        nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
                    }

                    nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));

                    if (signed)
                    {
                        return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
                    }
                    else
                    {
                        Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
                        return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
                    }
                }
            });
        }
    }
}