Common: Use function-level targeting for CRC32 (SSE4.2)

2025-07-25 07:09:48 -06:00 · 2016-12-23 23:48:55 +10:00
parent b74029ec57
commit 0f978227f5
1 changed files with 44 additions and 13 deletions
--- a/Source/Core/Common/Hash.cpp
+++ b/Source/Core/Common/Hash.cpp
@ -238,9 +238,11 @@ u64 GetMurmurHash3(const u8* src, u32 len, u32 samples)
 }

 // CRC32 hash using the SSE4.2 instruction
+#if defined(_M_X86_64)
+
+FUNCTION_TARGET_SSE42
 u64 GetCRC32(const u8* src, u32 len, u32 samples)
 {
-#if _M_SSE >= 0x402 || defined(_M_ARM_64)
  u64 h[4] = {len, 0, 0, 0};
  u32 Step = (len / 8);
  const u64* data = (const u64*)src;
@ -250,9 +252,7 @@ u64 GetCRC32(const u8* src, u32 len, u32 samples)
  Step = Step / samples;
  if (Step < 1)
    Step = 1;
-#endif

-#if _M_SSE >= 0x402
  while (data < end - Step * 3)
  {
    h[0] = _mm_crc32_u64(h[0], data[Step * 0]);
@ -274,7 +274,25 @@ u64 GetCRC32(const u8* src, u32 len, u32 samples)
    memcpy(&temp, end, len & 7);
    h[0] = _mm_crc32_u64(h[0], temp);
  }
+
+  // FIXME: is there a better way to combine these partial hashes?
+  return h[0] + (h[1] << 10) + (h[2] << 21) + (h[3] << 32);
+}
+
 #elif defined(_M_ARM_64)
+
+u64 GetCRC32(const u8* src, u32 len, u32 samples)
+{
+  u64 h[4] = {len, 0, 0, 0};
+  u32 Step = (len / 8);
+  const u64* data = (const u64*)src;
+  const u64* end = data + Step;
+  if (samples == 0)
+    samples = std::max(Step, 1u);
+  Step = Step / samples;
+  if (Step < 1)
+    Step = 1;
+
  // We should be able to use intrinsics for this
  // Too bad the intrinsics for this instruction was added in GCC 4.9.1
  // The Android NDK (as of r10e) only has GCC 4.9
@ -317,16 +335,20 @@ u64 GetCRC32(const u8* src, u32 len, u32 samples)
        : [res] "=r"(h[0])
        : [two] "r"(h[0]), [three] "r"(temp));
  }
-#endif

-#if _M_SSE >= 0x402 || defined(_M_ARM_64)
  // FIXME: is there a better way to combine these partial hashes?
  return h[0] + (h[1] << 10) + (h[2] << 21) + (h[3] << 32);
-#else
-  return 0;
-#endif
 }

+#else
+
+u64 GetCRC32(const u8* src, u32 len, u32 samples)
+{
+  return 0;
+}
+
+#endif
+
 /*
 * NOTE: This hash function is used for custom texture loading/dumping, so
 * it should not be changed, which would require all custom textures to be
@ -386,10 +408,13 @@ u64 GetHashHiresTexture(const u8* src, u32 len, u32 samples)
  return h;
 }
 #else
+
 // CRC32 hash using the SSE4.2 instruction
+#if defined(_M_X86)
+
+FUNCTION_TARGET_SSE42
 u64 GetCRC32(const u8* src, u32 len, u32 samples)
 {
-#if _M_SSE >= 0x402
  u32 h = len;
  u32 Step = (len / 4);
  const u32* data = (const u32*)src;
@ -407,11 +432,17 @@ u64 GetCRC32(const u8* src, u32 len, u32 samples)

  const u8* data2 = (const u8*)end;
  return (u64)_mm_crc32_u32(h, u32(data2[0]));
-#else
-  return 0;
-#endif
 }

+#else
+
+u64 GetCRC32(const u8* src, u32 len, u32 samples)
+{
+  return 0;
+}
+
+#endif
+
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
@ -606,7 +637,7 @@ u64 GetHash64(const u8* src, u32 len, u32 samples)
 // sets the hash function used for the texture cache
 void SetHash64Function()
 {
-#if _M_SSE >= 0x402
+#if defined(_M_X86_64) || defined(_M_X86)
  if (cpu_info.bSSE4_2)  // sse crc32 version
  {
    ptrHashFunction = &GetCRC32;