mirror of
https://github.com/Ryujinx-NX/Ryujinx.git
synced 2024-11-14 21:17:43 -07:00
17620d18db
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as short-hands for `F+VL` and `F+VL+DQ`. * ARMeilleure: Add initial support for EVEX instruction encoding Does not implement rounding, or exception controls. * ARMeilleure: Add `X86Vpternlogd` Accelerates the vector-`Not` instruction. * ARMeilleure: Add check for `OSXSAVE` for AVX{2,512} * ARMeilleure: Add check for `XCR0` flags Add XCR0 register checks for AVX and AVX512F, following the guidelines from section 14.3 and 15.2 from the Intel Architecture Software Developer's Manual. * ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting * ARMeilleure: Move XCR0 procedure to GetXcr0Eax * ARMeilleure: Add `XCR0` to `FeatureInfo` structure * ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly Avoids an additional allocation * ARMeilleure: Formatting fixes * ARMeilleure: Fix EVEX encoding src2 register index > Just like in VEX prefix, vvvv is provided in inverted form. * ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I` Passes unit tests, verified instruction utilization * ARMeilleure: Fix EVEX register operand designations Operand 2 was being sourced improperly. EVEX encoded instructions source their operands like so: Operand 1: ModRM:reg Operand 2: EVEX.vvvvv Operand 3: ModRM:r/m Operand 4: Imm This fixes the improper register designations when emitting vpternlog. Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions. * ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V` * ARMeilleure: PTC version bump * ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail * ARMeilleure: Update EVEX encoding comment capitalization
69 lines
4.5 KiB
C#
69 lines
4.5 KiB
C#
using System.Runtime.Intrinsics.Arm;
|
|
|
|
namespace ARMeilleure
|
|
{
|
|
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
|
|
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
|
|
|
|
public static class Optimizations
|
|
{
|
|
public static bool FastFP { get; set; } = true;
|
|
|
|
public static bool AllowLcqInFunctionTable { get; set; } = true;
|
|
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
|
|
|
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
|
public static bool UseArm64PmullIfAvailable { get; set; } = true;
|
|
|
|
public static bool UseSseIfAvailable { get; set; } = true;
|
|
public static bool UseSse2IfAvailable { get; set; } = true;
|
|
public static bool UseSse3IfAvailable { get; set; } = true;
|
|
public static bool UseSsse3IfAvailable { get; set; } = true;
|
|
public static bool UseSse41IfAvailable { get; set; } = true;
|
|
public static bool UseSse42IfAvailable { get; set; } = true;
|
|
public static bool UsePopCntIfAvailable { get; set; } = true;
|
|
public static bool UseAvxIfAvailable { get; set; } = true;
|
|
public static bool UseAvx512FIfAvailable { get; set; } = true;
|
|
public static bool UseAvx512VlIfAvailable { get; set; } = true;
|
|
public static bool UseAvx512BwIfAvailable { get; set; } = true;
|
|
public static bool UseAvx512DqIfAvailable { get; set; } = true;
|
|
public static bool UseF16cIfAvailable { get; set; } = true;
|
|
public static bool UseFmaIfAvailable { get; set; } = true;
|
|
public static bool UseAesniIfAvailable { get; set; } = true;
|
|
public static bool UsePclmulqdqIfAvailable { get; set; } = true;
|
|
public static bool UseShaIfAvailable { get; set; } = true;
|
|
public static bool UseGfniIfAvailable { get; set; } = true;
|
|
|
|
public static bool ForceLegacySse
|
|
{
|
|
get => X86HardwareCapabilities.ForceLegacySse;
|
|
set => X86HardwareCapabilities.ForceLegacySse = value;
|
|
}
|
|
|
|
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
|
|
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
|
|
|
|
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
|
|
internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2;
|
|
internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3;
|
|
internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3;
|
|
internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41;
|
|
internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
|
|
internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
|
|
internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
|
internal static bool UseAvx512F => UseAvx512FIfAvailable && X86HardwareCapabilities.SupportsAvx512F && !ForceLegacySse;
|
|
internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && X86HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse;
|
|
internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && X86HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse;
|
|
internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && X86HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse;
|
|
internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
|
|
internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
|
|
internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
|
|
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
|
|
internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
|
|
internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
|
|
|
|
internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl;
|
|
internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq;
|
|
}
|
|
}
|