dolphin/Externals/liblzma/lzma/fastpos.h

///////////////////////////////////////////////////////////////////////////////
//
/// \file       fastpos.h
/// \brief      Kind of two-bit version of bit scan reverse
///
//  Authors:    Igor Pavlov
//              Lasse Collin
//
//  This file has been put into the public domain.
//  You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////

#ifndef LZMA_FASTPOS_H
#define LZMA_FASTPOS_H

// LZMA encodes match distances by storing the highest two bits using
// a six-bit value [0, 63], and then the missing lower bits.
// Dictionary size is also stored using this encoding in the .xz
// file format header.
//
// fastpos.h provides a way to quickly find out the correct six-bit
// values. The following table gives some examples of this encoding:
//
//     dist   return
//       0       0
//       1       1
//       2       2
//       3       3
//       4       4
//       5       4
//       6       5
//       7       5
//       8       6
//      11       6
//      12       7
//     ...      ...
//      15       7
//      16       8
//      17       8
//     ...      ...
//      23       8
//      24       9
//      25       9
//     ...      ...
//
//
// Provided functions or macros
// ----------------------------
//
// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
// assumes that dist >= FULL_DISTANCES, thus the result is at least
// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
// should be tiny bit faster due to the assumption being made.
//
//
// Size vs. speed
// --------------
//
// With some CPUs that have fast BSR (bit scan reverse) instruction, the
// size optimized version is slightly faster than the bigger table based
// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
// would still have speed roughly comparable to the table version. Older
// x86 CPUs like the original Pentium have very slow BSR; on those systems
// the table version is a lot faster.
//
// On some CPUs, the table version is a lot faster when using position
// dependent code, but with position independent code the size optimized
// version is slightly faster. This occurs at least on 32-bit SPARC (no
// ASM optimizations).
//
// I'm making the table version the default, because that has good speed
// on all systems I have tried. The size optimized version is sometimes
// slightly faster, but sometimes it is a lot slower.

#ifdef HAVE_SMALL
#	define get_dist_slot(dist) \
		((dist) <= 4 ? (dist) : get_dist_slot_2(dist))

static inline uint32_t
get_dist_slot_2(uint32_t dist)
{
	const uint32_t i = bsr32(dist);
	return (i + i) + ((dist >> (i - 1)) & 1);
}


#else

#define FASTPOS_BITS 13

extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];


#define fastpos_shift(extra, n) \
	((extra) + (n) * (FASTPOS_BITS - 1))

#define fastpos_limit(extra, n) \
	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))

#define fastpos_result(dist, extra, n) \
	lzma_fastpos[(dist) >> fastpos_shift(extra, n)] \
			+ 2 * fastpos_shift(extra, n)


static inline uint32_t
get_dist_slot(uint32_t dist)
{
	// If it is small enough, we can pick the result directly from
	// the precalculated table.
	if (dist < fastpos_limit(0, 0))
		return lzma_fastpos[dist];

	if (dist < fastpos_limit(0, 1))
		return fastpos_result(dist, 0, 1);

	return fastpos_result(dist, 0, 2);
}


#ifdef FULL_DISTANCES_BITS
static inline uint32_t
get_dist_slot_2(uint32_t dist)
{
	assert(dist >= FULL_DISTANCES);

	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);

	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);

	return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
}
#endif

#endif

#endif
Externals: Add libLZMA. 2019-12-30 07:07:54 -07:00			`///////////////////////////////////////////////////////////////////////////////`
			`//`
			`/// \file fastpos.h`
			`/// \brief Kind of two-bit version of bit scan reverse`
			`///`
			`// Authors: Igor Pavlov`
			`// Lasse Collin`
			`//`
			`// This file has been put into the public domain.`
			`// You can do whatever you want with this file.`
			`//`
			`///////////////////////////////////////////////////////////////////////////////`

			`#ifndef LZMA_FASTPOS_H`
			`#define LZMA_FASTPOS_H`

			`// LZMA encodes match distances by storing the highest two bits using`
			`// a six-bit value [0, 63], and then the missing lower bits.`
			`// Dictionary size is also stored using this encoding in the .xz`
			`// file format header.`
			`//`
			`// fastpos.h provides a way to quickly find out the correct six-bit`
			`// values. The following table gives some examples of this encoding:`
			`//`
			`// dist return`
			`// 0 0`
			`// 1 1`
			`// 2 2`
			`// 3 3`
			`// 4 4`
			`// 5 4`
			`// 6 5`
			`// 7 5`
			`// 8 6`
			`// 11 6`
			`// 12 7`
			`// ... ...`
			`// 15 7`
			`// 16 8`
			`// 17 8`
			`// ... ...`
			`// 23 8`
			`// 24 9`
			`// 25 9`
			`// ... ...`
			`//`
			`//`
			`// Provided functions or macros`
			`// ----------------------------`
			`//`
			`// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)`
			`// assumes that dist >= FULL_DISTANCES, thus the result is at least`
			`// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of`
			`// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)`
			`// should be tiny bit faster due to the assumption being made.`
			`//`
			`//`
			`// Size vs. speed`
			`// --------------`
			`//`
			`// With some CPUs that have fast BSR (bit scan reverse) instruction, the`
			`// size optimized version is slightly faster than the bigger table based`
			`// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III`
			`// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that`
			`// would still have speed roughly comparable to the table version. Older`
			`// x86 CPUs like the original Pentium have very slow BSR; on those systems`
			`// the table version is a lot faster.`
			`//`
			`// On some CPUs, the table version is a lot faster when using position`
			`// dependent code, but with position independent code the size optimized`
			`// version is slightly faster. This occurs at least on 32-bit SPARC (no`
			`// ASM optimizations).`
			`//`
			`// I'm making the table version the default, because that has good speed`
			`// on all systems I have tried. The size optimized version is sometimes`
			`// slightly faster, but sometimes it is a lot slower.`

			`#ifdef HAVE_SMALL`
			`# define get_dist_slot(dist) \`
			`((dist) <= 4 ? (dist) : get_dist_slot_2(dist))`

			`static inline uint32_t`
			`get_dist_slot_2(uint32_t dist)`
			`{`
			`const uint32_t i = bsr32(dist);`
			`return (i + i) + ((dist >> (i - 1)) & 1);`
			`}`


			`#else`

			`#define FASTPOS_BITS 13`

			`extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];`


			`#define fastpos_shift(extra, n) \`
			`((extra) + (n) * (FASTPOS_BITS - 1))`

			`#define fastpos_limit(extra, n) \`
			`(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))`

			`#define fastpos_result(dist, extra, n) \`
			`lzma_fastpos[(dist) >> fastpos_shift(extra, n)] \`
			`+ 2 * fastpos_shift(extra, n)`


			`static inline uint32_t`
			`get_dist_slot(uint32_t dist)`
			`{`
			`// If it is small enough, we can pick the result directly from`
			`// the precalculated table.`
			`if (dist < fastpos_limit(0, 0))`
			`return lzma_fastpos[dist];`

			`if (dist < fastpos_limit(0, 1))`
			`return fastpos_result(dist, 0, 1);`

			`return fastpos_result(dist, 0, 2);`
			`}`


			`#ifdef FULL_DISTANCES_BITS`
			`static inline uint32_t`
			`get_dist_slot_2(uint32_t dist)`
			`{`
			`assert(dist >= FULL_DISTANCES);`

			`if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))`
			`return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);`

			`if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))`
			`return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);`

			`return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);`
			`}`
			`#endif`

			`#endif`

			`#endif`