mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-24 14:49:42 -06:00
166
Source/Core/Common/BitSet.h
Normal file
166
Source/Core/Common/BitSet.h
Normal file
@ -0,0 +1,166 @@
|
||||
// This file is under the public domain.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <initializer_list>
|
||||
#include <type_traits>
|
||||
#include "CommonTypes.h"
|
||||
|
||||
// Helper functions:
|
||||
|
||||
#ifdef _WIN32
|
||||
template <typename T>
|
||||
static inline int CountSetBits(T v)
|
||||
{
|
||||
// from https://graphics.stanford.edu/~seander/bithacks.html
|
||||
// GCC has this built in, but MSVC's intrinsic will only emit the actual
|
||||
// POPCNT instruction, which we're not depending on
|
||||
v = v - ((v >> 1) & (T)~(T)0/3);
|
||||
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
|
||||
v = (v + (v >> 4)) & (T)~(T)0/255*15;
|
||||
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(u32 val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(u64 val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward64(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
#else
|
||||
static inline int CountSetBits(u32 val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(u64 val) { return __builtin_popcountll(val); }
|
||||
static inline int LeastSignificantSetBit(u32 val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(u64 val) { return __builtin_ctzll(val); }
|
||||
#endif
|
||||
|
||||
// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
|
||||
namespace BS
|
||||
{
|
||||
|
||||
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
|
||||
// using the set bits of an integer to represent a set of integers. Like that
|
||||
// class, it acts like an array of bools:
|
||||
// BitSet32 bs;
|
||||
// bs[1] = true;
|
||||
// but also like the underlying integer ([0] = least significant bit):
|
||||
// BitSet32 bs2 = ...;
|
||||
// bs = (bs ^ bs2) & BitSet32(0xffff);
|
||||
// The following additional functionality is provided:
|
||||
// - Construction using an initializer list.
|
||||
// BitSet bs { 1, 2, 4, 8 };
|
||||
// - Efficiently iterating through the set bits:
|
||||
// for (int i : bs)
|
||||
// [i is the *index* of a set bit]
|
||||
// (This uses the appropriate CPU instruction to find the next set bit in one
|
||||
// operation.)
|
||||
// - Counting set bits using .Count() - see comment on that method.
|
||||
|
||||
// TODO: use constexpr when MSVC gets out of the Dark Ages
|
||||
|
||||
template <typename IntTy>
|
||||
class BitSet
|
||||
{
|
||||
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
|
||||
public:
|
||||
// A reference to a particular bit, returned from operator[].
|
||||
class Ref
|
||||
{
|
||||
public:
|
||||
Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
|
||||
Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
|
||||
operator bool() const { return (m_bs->m_val & m_mask) != 0; }
|
||||
bool operator=(bool set)
|
||||
{
|
||||
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
|
||||
return set;
|
||||
}
|
||||
private:
|
||||
BitSet* m_bs;
|
||||
IntTy m_mask;
|
||||
};
|
||||
|
||||
// A STL-like iterator is required to be able to use range-based for loops.
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
|
||||
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
|
||||
Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; }
|
||||
int operator*() { return m_bit; }
|
||||
Iterator& operator++()
|
||||
{
|
||||
if (m_val == 0)
|
||||
{
|
||||
m_bit = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bit = LeastSignificantSetBit(m_val);
|
||||
m_val &= ~(1 << bit);
|
||||
m_bit = bit;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
Iterator operator++(int _)
|
||||
{
|
||||
Iterator other(*this);
|
||||
++*this;
|
||||
return other;
|
||||
}
|
||||
bool operator==(Iterator other) const { return m_bit == other.m_bit; }
|
||||
bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
|
||||
private:
|
||||
IntTy m_val;
|
||||
int m_bit;
|
||||
};
|
||||
|
||||
BitSet() : m_val(0) {}
|
||||
explicit BitSet(IntTy val) : m_val(val) {}
|
||||
BitSet(std::initializer_list<int> init)
|
||||
{
|
||||
m_val = 0;
|
||||
for (int bit : init)
|
||||
m_val |= (IntTy)1 << bit;
|
||||
}
|
||||
|
||||
static BitSet AllTrue(size_t count)
|
||||
{
|
||||
return BitSet(count == sizeof(IntTy)*8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
|
||||
}
|
||||
|
||||
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
|
||||
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
|
||||
bool operator==(BitSet other) const { return m_val == other.m_val; }
|
||||
bool operator!=(BitSet other) const { return m_val != other.m_val; }
|
||||
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
|
||||
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
|
||||
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
|
||||
BitSet operator~() const { return BitSet(~m_val); }
|
||||
BitSet& operator|=(BitSet other) { return *this = *this | other; }
|
||||
BitSet& operator&=(BitSet other) { return *this = *this & other; }
|
||||
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
|
||||
operator u32() = delete;
|
||||
operator bool() { return m_val != 0; }
|
||||
|
||||
// Warning: Even though on modern CPUs this is a single fast instruction,
|
||||
// Dolphin's official builds do not currently assume POPCNT support on x86,
|
||||
// so slower explicit bit twiddling is generated. Still should generally
|
||||
// be faster than a loop.
|
||||
unsigned int Count() const { return CountSetBits(m_val); }
|
||||
|
||||
Iterator begin() const { Iterator it(m_val, 0); return ++it; }
|
||||
Iterator end() const { return Iterator(m_val, -1); }
|
||||
|
||||
IntTy m_val;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
typedef BS::BitSet<u32> BitSet32;
|
||||
typedef BS::BitSet<u64> BitSet64;
|
@ -39,6 +39,7 @@
|
||||
<ClInclude Include="Atomic_GCC.h" />
|
||||
<ClInclude Include="Atomic_Win32.h" />
|
||||
<ClInclude Include="BitField.h" />
|
||||
<ClInclude Include="BitSet.h" />
|
||||
<ClInclude Include="BreakPoints.h" />
|
||||
<ClInclude Include="CDUtils.h" />
|
||||
<ClInclude Include="ChunkFile.h" />
|
||||
@ -137,4 +138,4 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
@ -13,6 +13,7 @@
|
||||
<ClInclude Include="Atomic_GCC.h" />
|
||||
<ClInclude Include="Atomic_Win32.h" />
|
||||
<ClInclude Include="BitField.h" />
|
||||
<ClInclude Include="BitSet.h" />
|
||||
<ClInclude Include="BreakPoints.h" />
|
||||
<ClInclude Include="CDUtils.h" />
|
||||
<ClInclude Include="ChunkFile.h" />
|
||||
@ -118,4 +119,4 @@
|
||||
<ItemGroup>
|
||||
<Text Include="CMakeLists.txt" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
@ -10,31 +10,23 @@ using namespace Gen;
|
||||
|
||||
// Shared code between Win64 and Unix64
|
||||
|
||||
void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
|
||||
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
|
||||
{
|
||||
size_t shadow = 0;
|
||||
#if defined(_WIN32)
|
||||
shadow = 0x20;
|
||||
#endif
|
||||
|
||||
int count = 0;
|
||||
for (int r = 0; r < 16; r++)
|
||||
{
|
||||
if (mask & (1 << r))
|
||||
count++;
|
||||
}
|
||||
int count = (mask & ABI_ALL_GPRS).Count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
if (mask & 0xffff0000)
|
||||
int fpr_count = (mask & ABI_ALL_FPRS).Count();
|
||||
if (fpr_count)
|
||||
{
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xf;
|
||||
}
|
||||
for (int x = 0; x < 16; x++)
|
||||
{
|
||||
if (mask & (1 << (16 + x)))
|
||||
subtraction += 16;
|
||||
}
|
||||
subtraction += 16 * fpr_count;
|
||||
size_t xmm_base_subtraction = subtraction;
|
||||
subtraction += needed_frame_size;
|
||||
subtraction += shadow;
|
||||
@ -47,44 +39,35 @@ void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t nee
|
||||
*xmm_offsetp = subtraction - xmm_base_subtraction;
|
||||
}
|
||||
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
{
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int r = 0; r < 16; r++)
|
||||
{
|
||||
if (mask & (1 << r))
|
||||
PUSH((X64Reg) r);
|
||||
}
|
||||
for (int r : mask & ABI_ALL_GPRS)
|
||||
PUSH((X64Reg) r);
|
||||
|
||||
if (subtraction)
|
||||
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int x = 0; x < 16; x++)
|
||||
for (int x : mask & ABI_ALL_FPRS)
|
||||
{
|
||||
if (mask & (1 << (16 + x)))
|
||||
{
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) x);
|
||||
xmm_offset += 16;
|
||||
}
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) (x - 16));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
return shadow;
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size)
|
||||
{
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int x = 0; x < 16; x++)
|
||||
for (int x : mask & ABI_ALL_FPRS)
|
||||
{
|
||||
if (mask & (1 << (16 + x)))
|
||||
{
|
||||
MOVAPD((X64Reg) x, MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
if (subtraction)
|
||||
@ -92,10 +75,8 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, si
|
||||
|
||||
for (int r = 15; r >= 0; r--)
|
||||
{
|
||||
if (mask & (1 << r))
|
||||
{
|
||||
if (mask[r])
|
||||
POP((X64Reg) r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
||||
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
||||
@ -23,6 +24,9 @@
|
||||
// Callee-save: RBX RBP R12 R13 R14 R15
|
||||
// Parameters: RDI RSI RDX RCX R8 R9
|
||||
|
||||
#define ABI_ALL_FPRS BitSet32(0xffff0000)
|
||||
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
|
||||
|
||||
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
|
||||
|
||||
#define ABI_PARAM1 RCX
|
||||
@ -31,11 +35,9 @@
|
||||
#define ABI_PARAM4 R9
|
||||
|
||||
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
|
||||
#define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \
|
||||
(1 << R9) | (1 << R10) | (1 << R11) | \
|
||||
(1 << (XMM0+16)) | (1 << (XMM1+16)) | (1 << (XMM2+16)) | (1 << (XMM3+16)) | \
|
||||
(1 << (XMM4+16)) | (1 << (XMM5+16)))
|
||||
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
|
||||
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
|
||||
#else //64-bit Unix / OS X
|
||||
|
||||
#define ABI_PARAM1 RDI
|
||||
@ -47,13 +49,12 @@
|
||||
|
||||
// FIXME: avoid pushing all 16 XMM registers when possible? most functions we call probably
|
||||
// don't actually clobber them.
|
||||
#define ABI_ALL_CALLER_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \
|
||||
(1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \
|
||||
0xffff0000 /* xmm0..15 */)
|
||||
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
|
||||
ABI_ALL_FPRS)
|
||||
#endif // WIN32
|
||||
|
||||
#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED)
|
||||
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
|
||||
|
||||
#define ABI_RETURN RAX
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CodeBlock.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
@ -302,7 +303,7 @@ private:
|
||||
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
|
||||
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
|
||||
|
||||
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||
|
||||
protected:
|
||||
inline void Write8(u8 value) {*code++ = value;}
|
||||
@ -883,8 +884,8 @@ public:
|
||||
// Saves/restores the registers and adjusts the stack to be aligned as
|
||||
// required by the ABI, where the previous alignment was as specified.
|
||||
// Push returns the size of the shadow space, i.e. the offset of the frame.
|
||||
size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
|
||||
inline int ABI_GetNumXMMRegs() { return 16; }
|
||||
|
||||
|
Reference in New Issue
Block a user