mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-23 06:09:50 -06:00
Video: Clearly separate Texture and EFB Copy formats
Improve bookkeeping around formats. Hopefully make code less confusing. - Rename TlutFormat -> TLUTFormat to follow conventions. - Use enum classes to prevent using a Texture format where an EFB Copy format is expected or vice-versa. - Use common EFBCopyFormat names regardless of depth and YUV configurations.
This commit is contained in:
@ -212,12 +212,13 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
|
||||
// free to make the assumption that addresses are multiples of 16 in the aligned case.
|
||||
// TODO: complete SSE2 optimization of less often used texture formats.
|
||||
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
|
||||
static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
switch (tlutfmt)
|
||||
{
|
||||
case GX_TL_RGB5A3:
|
||||
case TLUTFormat::RGB5A3:
|
||||
{
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
|
||||
@ -226,7 +227,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_TL_IA8:
|
||||
case TLUTFormat::IA8:
|
||||
{
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
|
||||
@ -235,7 +236,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_TL_RGB565:
|
||||
case TLUTFormat::RGB565:
|
||||
{
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
|
||||
@ -251,8 +252,8 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
|
||||
|
||||
FUNCTION_TARGET_SSSE3
|
||||
static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
|
||||
const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
|
||||
@ -298,8 +299,9 @@ static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, i
|
||||
}
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
|
||||
const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
|
||||
@ -390,8 +392,8 @@ static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int hei
|
||||
|
||||
FUNCTION_TARGET_SSSE3
|
||||
static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// xsacha optimized with SSSE3 intrinsics
|
||||
// Produces a ~10% speed improvement over SSE2 implementation
|
||||
@ -419,8 +421,9 @@ static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, i
|
||||
}
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// JSD optimized with SSE2 intrinsics.
|
||||
// Produces an ~86% speed improvement over reference C implementation.
|
||||
@ -518,12 +521,13 @@ static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int hei
|
||||
}
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
switch (tlutfmt)
|
||||
{
|
||||
case GX_TL_RGB5A3:
|
||||
case TLUTFormat::RGB5A3:
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
@ -532,7 +536,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_TL_IA8:
|
||||
case TLUTFormat::IA8:
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
@ -541,7 +545,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_TL_RGB565:
|
||||
case TLUTFormat::RGB565:
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
@ -555,8 +559,9 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
|
||||
}
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
{
|
||||
@ -572,8 +577,8 @@ static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int he
|
||||
|
||||
FUNCTION_TARGET_SSSE3
|
||||
static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// xsacha optimized with SSSE3 intrinsics.
|
||||
// Produces an ~50% speed improvement over SSE2 implementation.
|
||||
@ -595,8 +600,9 @@ static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width,
|
||||
}
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// JSD optimized with SSE2 intrinsics.
|
||||
// Produces an ~80% speed improvement over reference C implementation.
|
||||
@ -656,12 +662,12 @@ static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int he
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
switch (tlutfmt)
|
||||
{
|
||||
case GX_TL_RGB5A3:
|
||||
case TLUTFormat::RGB5A3:
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
@ -670,7 +676,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_TL_IA8:
|
||||
case TLUTFormat::IA8:
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
@ -679,7 +685,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_TL_RGB565:
|
||||
case TLUTFormat::RGB565:
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
@ -694,8 +700,8 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// JSD optimized with SSE2 intrinsics.
|
||||
// Produces an ~78% speed improvement over reference C implementation.
|
||||
@ -766,8 +772,8 @@ static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int
|
||||
|
||||
FUNCTION_TARGET_SSSE3
|
||||
static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
|
||||
const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
|
||||
@ -872,8 +878,8 @@ static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int widt
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
|
||||
const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
|
||||
@ -993,8 +999,8 @@ static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int
|
||||
|
||||
FUNCTION_TARGET_SSSE3
|
||||
static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
TextureFormat texformat, const u8* tlut,
|
||||
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// xsacha optimized with SSSE3 instrinsics
|
||||
// Produces a ~30% speed improvement over SSE2 implementation
|
||||
@ -1027,7 +1033,7 @@ static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// JSD optimized with SSE2 intrinsics
|
||||
@ -1148,7 +1154,7 @@ static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int
|
||||
}
|
||||
|
||||
static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int height,
|
||||
int texformat, const u8* tlut, TlutFormat tlutfmt,
|
||||
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
|
||||
int Wsteps4, int Wsteps8)
|
||||
{
|
||||
// The metroid games use this format almost exclusively.
|
||||
@ -1403,19 +1409,19 @@ static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int h
|
||||
}
|
||||
}
|
||||
|
||||
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
|
||||
const u8* tlut, TlutFormat tlutfmt)
|
||||
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
|
||||
const u8* tlut, TLUTFormat tlutfmt)
|
||||
{
|
||||
int Wsteps4 = (width + 3) / 4;
|
||||
int Wsteps8 = (width + 7) / 8;
|
||||
|
||||
switch (texformat)
|
||||
{
|
||||
case GX_TF_C4:
|
||||
case TextureFormat::C4:
|
||||
TexDecoder_DecodeImpl_C4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_I4:
|
||||
case TextureFormat::I4:
|
||||
if (cpu_info.bSSSE3)
|
||||
TexDecoder_DecodeImpl_I4_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
@ -1423,7 +1429,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
|
||||
TexDecoder_DecodeImpl_I4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_I8:
|
||||
case TextureFormat::I8:
|
||||
if (cpu_info.bSSSE3)
|
||||
TexDecoder_DecodeImpl_I8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
@ -1431,15 +1437,15 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
|
||||
TexDecoder_DecodeImpl_I8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_C8:
|
||||
case TextureFormat::C8:
|
||||
TexDecoder_DecodeImpl_C8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_IA4:
|
||||
case TextureFormat::IA4:
|
||||
TexDecoder_DecodeImpl_IA4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_IA8:
|
||||
case TextureFormat::IA8:
|
||||
if (cpu_info.bSSSE3)
|
||||
TexDecoder_DecodeImpl_IA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
@ -1448,17 +1454,17 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
|
||||
Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_C14X2:
|
||||
case TextureFormat::C14X2:
|
||||
TexDecoder_DecodeImpl_C14X2(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_RGB565:
|
||||
case TextureFormat::RGB565:
|
||||
TexDecoder_DecodeImpl_RGB565(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_RGB5A3:
|
||||
case TextureFormat::RGB5A3:
|
||||
if (cpu_info.bSSSE3)
|
||||
TexDecoder_DecodeImpl_RGB5A3_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
@ -1467,7 +1473,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
|
||||
Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_RGBA8:
|
||||
case TextureFormat::RGBA8:
|
||||
if (cpu_info.bSSSE3)
|
||||
TexDecoder_DecodeImpl_RGBA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
|
||||
Wsteps8);
|
||||
@ -1476,12 +1482,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
|
||||
Wsteps8);
|
||||
break;
|
||||
|
||||
case GX_TF_CMPR:
|
||||
case TextureFormat::CMPR:
|
||||
TexDecoder_DecodeImpl_CMPR(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
|
||||
break;
|
||||
|
||||
default:
|
||||
PanicAlert("Unhandled texture format %d", texformat);
|
||||
PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)",
|
||||
static_cast<int>(texformat));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user