JIT: optimize for the common case of unquantized psq_l/st

Optimistically assume used GQRs are 0 in blocks that only use one GQR, and
bail at the start of the block and recompile if that assumption fails.

Many games use almost entirely unquantized stores (e.g. Rebel Strike, Sonic
Colors), so this will likely be a big performance improvement across the board
for games with heavy use of paired singles.
This commit is contained in:
Fiora
2015-01-04 04:20:59 -08:00
parent e32d63c43d
commit 8237004448
12 changed files with 275 additions and 17 deletions

View File

@ -21,6 +21,18 @@ static inline int CountSetBits(T v)
v = (v + (v >> 4)) & (T)~(T)0/255*15;
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
}
static inline int LeastSignificantSetBit(u8 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(u16 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(u32 val)
{
unsigned long index;
@ -34,8 +46,12 @@ static inline int LeastSignificantSetBit(u64 val)
return (int)index;
}
#else
static inline int CountSetBits(u8 val) { return __builtin_popcount(val); }
static inline int CountSetBits(u16 val) { return __builtin_popcount(val); }
static inline int CountSetBits(u32 val) { return __builtin_popcount(val); }
static inline int CountSetBits(u64 val) { return __builtin_popcountll(val); }
static inline int LeastSignificantSetBit(u8 val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(u16 val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(u32 val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(u64 val) { return __builtin_ctzll(val); }
#endif
@ -163,5 +179,7 @@ public:
}
typedef BS::BitSet<u8> BitSet8;
typedef BS::BitSet<u16> BitSet16;
typedef BS::BitSet<u32> BitSet32;
typedef BS::BitSet<u64> BitSet64;