From 52b52631c2d241cb0eddebb85d3b0473955bab87 Mon Sep 17 00:00:00 2001 From: Techjar Date: Sun, 26 Jan 2020 21:38:51 -0500 Subject: [PATCH 1/4] x64CPUDetect: Add detection for AMD Zen architecture --- Source/Core/Common/CPUDetect.h | 1 + Source/Core/Common/x64CPUDetect.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Source/Core/Common/CPUDetect.h b/Source/Core/Common/CPUDetect.h index 4e619c5b8a..7ecc3e00cd 100644 --- a/Source/Core/Common/CPUDetect.h +++ b/Source/Core/Common/CPUDetect.h @@ -54,6 +54,7 @@ struct CPUInfo bool bLAHFSAHF64 = false; bool bLongMode = false; bool bAtom = false; + bool bZen = false; // ARMv8 specific bool bFP = false; diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index b66ea45cfa..0c3decba05 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -118,6 +118,9 @@ void CPUInfo::Detect() (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D)) bAtom = true; + // Detect AMD Zen (all models) + if (family == 23) + bZen = true; logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; ht = (cpu_id[3] >> 28) & 1; From 185e49d2a99c9edd8adca92d45dd5ba02e9892de Mon Sep 17 00:00:00 2001 From: Techjar Date: Sun, 26 Jan 2020 22:08:55 -0500 Subject: [PATCH 2/4] x64CPUDetect: Add flag for slow PDEP/PEXT on AMD Zen For some unknown reason PDEP and PEXT are ridiculously slow on AMD Zen architecture. --- Source/Core/Common/CPUDetect.h | 3 +++ Source/Core/Common/x64CPUDetect.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/Source/Core/Common/CPUDetect.h b/Source/Core/Common/CPUDetect.h index 7ecc3e00cd..f789ba41a2 100644 --- a/Source/Core/Common/CPUDetect.h +++ b/Source/Core/Common/CPUDetect.h @@ -42,6 +42,9 @@ struct CPUInfo bool bAVX2 = false; bool bBMI1 = false; bool bBMI2 = false; + // PDEP and PEXT are ridiculously slow on AMD Zen, so we have this flag to avoid using them there + // Zen 2 is also affected by this issue + bool bFastBMI2 = false; bool bFMA = false; bool bFMA4 = false; bool bAES = false; diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index 0c3decba05..338c74b808 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -175,6 +175,7 @@ void CPUInfo::Detect() } bFlushToZero = bSSE; + bFastBMI2 = bBMI2 && !bZen; if (max_ex_fn >= 0x80000004) { From a106c99826e358f7d297a1866209e5e446090cd5 Mon Sep 17 00:00:00 2001 From: Techjar Date: Sun, 26 Jan 2020 17:59:56 -0500 Subject: [PATCH 3/4] Jit64: Don't use PEXT in DoubleToSingle on AMD Zen This was causing severe slowdown in some games. --- Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index ab06681a88..57fb829688 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -68,7 +68,7 @@ void CommonAsmRoutines::GenConvertDoubleToSingle() // Don't Denormalize - if (cpu_info.bBMI2) + if (cpu_info.bFastBMI2) { // Extract bits 0-1 and 5-34 MOV(64, R(RSCRATCH), Imm64(0xc7ffffffe0000000)); From 3a3dc28d54ed2890edcf72b6cdbb602afce4a323 Mon Sep 17 00:00:00 2001 From: Techjar Date: Sun, 26 Jan 2020 21:42:56 -0500 Subject: [PATCH 4/4] VertexLoaderX64: Don't use PDEP on AMD Zen --- Source/Core/VideoCommon/VertexLoaderX64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index c29981b9ec..96832e3449 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -284,7 +284,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format) // RRRRRGGG GGGBBBBB // AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR LoadAndSwap(16, scratch1, data); - if (cpu_info.bBMI1 && cpu_info.bBMI2) + if (cpu_info.bBMI1 && cpu_info.bFastBMI2) { MOV(32, R(scratch2), Imm32(0x07C3F7C0)); PDEP(32, scratch3, scratch1, R(scratch2)); @@ -324,7 +324,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format) // RRRRGGGG BBBBAAAA // AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR LoadAndSwap(16, scratch1, data); - if (cpu_info.bBMI2) + if (cpu_info.bFastBMI2) { MOV(32, R(scratch2), Imm32(0x0F0F0F0F)); PDEP(32, scratch1, scratch1, R(scratch2)); @@ -353,7 +353,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format) // AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap LoadAndSwap(32, scratch1, data); - if (cpu_info.bBMI2) + if (cpu_info.bFastBMI2) { MOV(32, R(scratch2), Imm32(0xFCFCFCFC)); PDEP(32, scratch1, scratch1, R(scratch2));