Merge pull request #2972 from lioncash/align

General: Replace GC_ALIGN macros with alignas
2025-07-21 05:09:34 -06:00 · 2015-09-11 17:00:13 +00:00
parent d7acf06505 8ce04f9a65
commit c5685ba53a
19 changed files with 73 additions and 84 deletions
--- a/Source/Core/Core/HW/GPFifo.cpp
+++ b/Source/Core/Core/HW/GPFifo.cpp
@ -22,16 +22,16 @@ namespace GPFifo
 // 32 Byte gather pipe with extra space
 // Overfilling is no problem (up to the real limit), CheckGatherPipe will blast the
 // contents in nicely sized chunks
-
+//
 // Other optimizations to think about:
-
-// If the gp is NOT linked to the fifo, just blast to memory byte by word
-// If the gp IS linked to the fifo, use a fast wrapping buffer and skip writing to memory
-
+// - If the GP is NOT linked to the FIFO, just blast to memory byte by word
+// - If the GP IS linked to the FIFO, use a fast wrapping buffer and skip writing to memory
+//
 // Both of these should actually work! Only problem is that we have to decide at run time,
 // the same function could use both methods. Compile 2 different versions of each such block?

-u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
+// More room for the fastmodes
+alignas(32) u8 m_gatherPipe[GATHER_PIPE_SIZE * 16];

 // pipe counter
 u32 m_gatherPipeCount = 0;
--- a/Source/Core/Core/HW/GPFifo.h
+++ b/Source/Core/Core/HW/GPFifo.h
@ -17,7 +17,8 @@ enum
 	GATHER_PIPE_SIZE = 32
 };

-extern u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
+// More room for the fastmodes
+alignas(32) extern u8 m_gatherPipe[GATHER_PIPE_SIZE * 16];

 // pipe counter
 extern u32 m_gatherPipeCount;
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@ -10,12 +10,12 @@

 using namespace Gen;

-static const u64 GC_ALIGNED16(psSignBits[2])      = {0x8000000000000000ULL, 0x0000000000000000ULL};
-static const u64 GC_ALIGNED16(psSignBits2[2])     = {0x8000000000000000ULL, 0x8000000000000000ULL};
-static const u64 GC_ALIGNED16(psAbsMask[2])       = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL};
-static const u64 GC_ALIGNED16(psAbsMask2[2])      = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
-static const u64 GC_ALIGNED16(psGeneratedQNaN[2]) = {0x7FF8000000000000ULL, 0x7FF8000000000000ULL};
-static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
+alignas(16) static const u64 psSignBits[2]      = {0x8000000000000000ULL, 0x0000000000000000ULL};
+alignas(16) static const u64 psSignBits2[2]     = {0x8000000000000000ULL, 0x8000000000000000ULL};
+alignas(16) static const u64 psAbsMask[2]       = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL};
+alignas(16) static const u64 psAbsMask2[2]      = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
+alignas(16) static const u64 psGeneratedQNaN[2] = {0x7FF8000000000000ULL, 0x7FF8000000000000ULL};
+alignas(16) static const double half_qnan_and_s32_max[2] = {0x7FFFFFFF, -0x80000};

 X64Reg Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
                        void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool packed, bool preserve_inputs, bool roundRHS)
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@ -205,12 +205,12 @@ void CommonAsmRoutines::GenMfcr()
 }

 // Safe + Fast Quantizers, originally from JITIL by magumagu
-static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
-static const float GC_ALIGNED16(m_32767) = 32767.0f;
-static const float GC_ALIGNED16(m_m32768) = -32768.0f;
-static const float GC_ALIGNED16(m_255) = 255.0f;
-static const float GC_ALIGNED16(m_127) = 127.0f;
-static const float GC_ALIGNED16(m_m128) = -128.0f;
+alignas(16) static const float m_65535[4] = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+alignas(16) static const float m_32767 = 32767.0f;
+alignas(16) static const float m_m32768 = -32768.0f;
+alignas(16) static const float m_255 = 255.0f;
+alignas(16) static const float m_127 = 127.0f;
+alignas(16) static const float m_m128 = -128.0f;

 #define QUANTIZE_OVERFLOW_SAFE

--- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
+++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
@ -104,7 +104,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I)
 }

 static u64 SlotSet[1000];
-static u8 GC_ALIGNED16(FSlotSet[16*1000]);
+alignas(16) static u8 FSlotSet[16 * 1000];

 static OpArg regLocForSlot(RegInfo& RI, unsigned slot)
 {
@ -760,7 +760,7 @@ static void regWriteExit(RegInfo& RI, InstLoc dest)
 }

 // Helper function to check floating point exceptions
-static double GC_ALIGNED16(isSNANTemp[2][2]);
+alignas(16) static double isSNANTemp[2][2];
 static bool checkIsSNAN()
 {
 	return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]);
@ -1742,7 +1742,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
 				break;

 			X64Reg reg = fregURegWithMov(RI, I);
-			static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000};
+			alignas(16) static const u32 ssSignBits[4] = {0x80000000};
 			Jit->PXOR(reg, M(ssSignBits));
 			RI.fregs[reg] = I;
 			fregNormalRegClear(RI, I);
@ -1754,7 +1754,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
 				break;

 			X64Reg reg = fregURegWithMov(RI, I);
-			static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL};
+			alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL};
 			Jit->PXOR(reg, M(sdSignBits));
 			RI.fregs[reg] = I;
 			fregNormalRegClear(RI, I);
@ -1766,7 +1766,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
 				break;

 			X64Reg reg = fregURegWithMov(RI, I);
-			static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000};
+			alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000};
 			Jit->PXOR(reg, M(psSignBits));
 			RI.fregs[reg] = I;
 			fregNormalRegClear(RI, I);
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
@ -4,10 +4,10 @@

 #include "Core/PowerPC/JitCommon/JitAsmCommon.h"

-const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
+alignas(16) const u8 pbswapShuffle1x4[16] = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+alignas(16) const u8 pbswapShuffle2x4[16] = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };

-const float GC_ALIGNED16(m_quantizeTableS[]) =
+alignas(16) const float m_quantizeTableS[] =
 {
 	(1ULL <<  0), (1ULL <<  0), (1ULL <<  1), (1ULL <<  1), (1ULL <<  2), (1ULL <<  2), (1ULL <<  3), (1ULL <<  3),
 	(1ULL <<  4), (1ULL <<  4), (1ULL <<  5), (1ULL <<  5), (1ULL <<  6), (1ULL <<  6), (1ULL <<  7), (1ULL <<  7),
@ -35,7 +35,7 @@ const float GC_ALIGNED16(m_quantizeTableS[]) =
 	1.0 / (1ULL <<  2), 1.0 / (1ULL <<  2), 1.0 / (1ULL <<  1), 1.0 / (1ULL <<  1),
 };

-const float GC_ALIGNED16(m_dequantizeTableS[]) =
+alignas(16) const float m_dequantizeTableS[] =
 {
 	1.0 / (1ULL <<  0), 1.0 / (1ULL <<  0), 1.0 / (1ULL <<  1), 1.0 / (1ULL <<  1),
 	1.0 / (1ULL <<  2), 1.0 / (1ULL <<  2), 1.0 / (1ULL <<  3), 1.0 / (1ULL <<  3),
@ -63,4 +63,4 @@ const float GC_ALIGNED16(m_dequantizeTableS[]) =
 	(1ULL <<  4), (1ULL <<  4), (1ULL <<  3), (1ULL <<  3), (1ULL <<  2), (1ULL <<  2), (1ULL <<  1), (1ULL <<  1),
 };

-const float GC_ALIGNED16(m_one[]) = { 1.0f, 0.0f, 0.0f, 0.0f };
+alignas(16) const float m_one[] = { 1.0f, 0.0f, 0.0f, 0.0f };
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
@ -4,13 +4,13 @@

 #pragma once

-#include "Common/Common.h"
+#include "Common/CommonTypes.h"

-extern const u8 GC_ALIGNED16(pbswapShuffle1x4[16]);
-extern const u8 GC_ALIGNED16(pbswapShuffle2x4[16]);
-extern const float GC_ALIGNED16(m_one[]);
-extern const float GC_ALIGNED16(m_quantizeTableS[]);
-extern const float GC_ALIGNED16(m_dequantizeTableS[]);
+alignas(16) extern const u8 pbswapShuffle1x4[16];
+alignas(16) extern const u8 pbswapShuffle2x4[16];
+alignas(16) extern const float m_one[];
+alignas(16) extern const float m_quantizeTableS[];
+alignas(16) extern const float m_dequantizeTableS[];

 class CommonAsmRoutinesBase
 {
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
@ -691,8 +691,8 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&,
 	}
 }

-static const u64 GC_ALIGNED16(psMantissaTruncate[2]) = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL};
-static const u64 GC_ALIGNED16(psRoundBit[2]) = {0x8000000, 0x8000000};
+alignas(16) static const u64 psMantissaTruncate[2] = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL};
+alignas(16) static const u64 psRoundBit[2] = {0x8000000, 0x8000000};

 // Emulate the odd truncation/rounding that the PowerPC does on the RHS operand before
 // a single precision multiply. To be precise, it drops the low 28 bits of the mantissa,
@ -724,8 +724,8 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
 	}
 }

-static u32 GC_ALIGNED16(temp32);
-static u64 GC_ALIGNED16(temp64);
+alignas(16) static u32 temp32;
+alignas(16) static u64 temp64;

 // Since the following float conversion functions are used in non-arithmetic PPC float instructions,
 // they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs.
@ -740,12 +740,12 @@ static u64 GC_ALIGNED16(temp64);
 //#define MORE_ACCURATE_DOUBLETOSINGLE
 #ifdef MORE_ACCURATE_DOUBLETOSINGLE

-static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi64x(0, 0x7ff0000000000000);
-static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi64x(0, 0x000fffffffffffff);
-static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0, 0x8000000000000000);
-static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi64x(0, 0x0010000000000000);
-static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi64x(0, 0xc000000000000000);
-static const __m128i GC_ALIGNED16(double_bottom_bits)  = _mm_set_epi64x(0, 0x07ffffffe0000000);
+alignas(16) static const __m128i double_exponent = _mm_set_epi64x(0, 0x7ff0000000000000);
+alignas(16) static const __m128i double_fraction = _mm_set_epi64x(0, 0x000fffffffffffff);
+alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0, 0x8000000000000000);
+alignas(16) static const __m128i double_explicit_top_bit = _mm_set_epi64x(0, 0x0010000000000000);
+alignas(16) static const __m128i double_top_two_bits = _mm_set_epi64x(0, 0xc000000000000000);
+alignas(16) static const __m128i double_bottom_bits  = _mm_set_epi64x(0, 0x07ffffffe0000000);

 // This is the same algorithm used in the interpreter (and actual hardware)
 // The documentation states that the conversion of a double with an outside the
@ -816,12 +816,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)

 #else // MORE_ACCURATE_DOUBLETOSINGLE

-static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0xffffffffffffffff, 0x7fffffffffffffff);
-static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffbfffff);
-static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi64x(0xffffffffffffffff, 0xfff7ffffffffffff);
+alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0xffffffffffffffff, 0x7fffffffffffffff);
+alignas(16) static const __m128i single_qnan_bit = _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffbfffff);
+alignas(16) static const __m128i double_qnan_bit = _mm_set_epi64x(0xffffffffffffffff, 0xfff7ffffffffffff);

 // Smallest positive double that results in a normalized single.
-static const double GC_ALIGNED16(min_norm_single) = std::numeric_limits<float>::min();
+alignas(16) static const double min_norm_single = std::numeric_limits<float>::min();

 void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
 {
@ -895,9 +895,9 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
 	MOVDDUP(dst, R(dst));
 }

-static const u64 GC_ALIGNED16(psDoubleExp[2])  = {0x7FF0000000000000ULL, 0};
-static const u64 GC_ALIGNED16(psDoubleFrac[2]) = {0x000FFFFFFFFFFFFFULL, 0};
-static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0};
+alignas(16) static const u64 psDoubleExp[2]  = {0x7FF0000000000000ULL, 0};
+alignas(16) static const u64 psDoubleFrac[2] = {0x000FFFFFFFFFFFFFULL, 0};
+alignas(16) static const u64 psDoubleNoSign[2] = {0x7FFFFFFFFFFFFFFFULL, 0};

 // TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer, storing
 // the result of each floating point op and calculating it when needed. This is trickier than for integers
--- a/Source/Core/Core/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/PowerPC/PowerPC.cpp
@ -29,7 +29,7 @@ namespace PowerPC
 {

 // STATE_TO_SAVE
-PowerPCState GC_ALIGNED16(ppcState);
+PowerPCState ppcState;
 static volatile CPUState state = CPU_POWERDOWN;

 Interpreter * const interpreter = Interpreter::getInstance();
--- a/Source/Core/Core/PowerPC/PowerPC.h
+++ b/Source/Core/Core/PowerPC/PowerPC.h
@ -57,7 +57,7 @@ struct tlb_entry
 };

 // This contains the entire state of the emulated PowerPC "Gekko" CPU.
-struct GC_ALIGNED64(PowerPCState)
+struct PowerPCState
 {
 	u32 gpr[32];    // General purpose registers. r1 = stack pointer.

@ -108,7 +108,7 @@ struct GC_ALIGNED64(PowerPCState)
 	// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
 	// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
 	// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
-	GC_ALIGNED16(u64 ps[32][2]);
+	alignas(16) u64 ps[32][2];

 	u32 sr[16];  // Segment registers.