From 0b7bed4a5297a2368a7f2e2f921a28c6e628d8fb Mon Sep 17 00:00:00 2001 From: "Jasper St. Pierre" Date: Sun, 10 Aug 2014 15:48:11 -0400 Subject: [PATCH] TextureDecoder: Simplify how the reference texture decoder works Instead of having three separate functions and checking the tlutfmt in a variety of places, just do it once in a helper method. This is already for the slow path either in our Generic decoder or in our Software renderer, so it doesn't matter that this is slower. x64 will continue using the separate functions for speed. --- .../VideoCommon/TextureDecoder_Common.cpp | 54 ++---- .../VideoCommon/TextureDecoder_Generic.cpp | 168 ++++-------------- 2 files changed, 52 insertions(+), 170 deletions(-) diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index cb71c3613f..1fd09fc1f9 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -301,6 +301,21 @@ struct DXTBlock u8 lines[4]; }; +static inline u32 decodePalettedPixel(u16 pixel, TlutFormat tlutfmt) +{ + switch (tlutfmt) + { + case GX_TL_IA8: + return decodeIA8Swapped(pixel); + case GX_TL_RGB565: + return decode565RGBA(Common::swap16(pixel)); + case GX_TL_RGB5A3: + return decode5A3RGBA(Common::swap16(pixel)); + default: + return 0; + } +} + void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut_, TlutFormat tlutfmt) { /* General formula for computing texture offset @@ -332,18 +347,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth u8 val = (*(src + offset) >> rs) & 0xF; u16 *tlut = (u16*) tlut_; - switch (tlutfmt) - { - case GX_TL_IA8: - *((u32*)dst) = decodeIA8Swapped(tlut[val]); - break; - case GX_TL_RGB565: - *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); - break; - case GX_TL_RGB5A3: - *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); - break; - } + *((u32*)dst) = decodePalettedPixel(tlut[val], tlutfmt); } break; case GX_TF_I4: @@ -397,18 +401,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth u8 val = *(src + base + blkOff); u16 *tlut = (u16*) tlut_; - switch (tlutfmt) - { - case GX_TL_IA8: - *((u32*)dst) = decodeIA8Swapped(tlut[val]); - break; - case GX_TL_RGB565: - *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); - break; - case GX_TL_RGB5A3: - *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); - break; - } + *((u32*)dst) = decodePalettedPixel(tlut[val], tlutfmt); } break; case GX_TF_IA4: @@ -462,18 +455,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth u16 val = Common::swap16(*valAddr) & 0x3FFF; u16 *tlut = (u16*) tlut_; - switch (tlutfmt) - { - case GX_TL_IA8: - *((u32*)dst) = decodeIA8Swapped(tlut[val]); - break; - case GX_TL_RGB565: - *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val])); - break; - case GX_TL_RGB5A3: - *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val])); - break; - } + *((u32*)dst) = decodePalettedPixel(tlut[val], tlutfmt); } break; case GX_TF_RGB565: diff --git a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp index 41144b79b5..92a7687f4e 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp @@ -60,94 +60,49 @@ struct DXTBlock u8 lines[4]; }; -static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_) +static inline u32 decodePalettedPixel(u16 pixel, TlutFormat tlutfmt) +{ + switch (tlutfmt) + { + case GX_TL_IA8: + return decodeIA8Swapped(pixel); + case GX_TL_RGB565: + return decode565RGBA(Common::swap16(pixel)); + case GX_TL_RGB5A3: + return decode5A3RGBA(Common::swap16(pixel)); + default: + return 0; + } +} + +static inline void decodeC4(u32 *dst, const u8 *src, const u8* tlut_, TlutFormat tlutfmt) { const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; - *dst++ = decode5A3RGBA(Common::swap16(tlut[val >> 4])); - *dst++ = decode5A3RGBA(Common::swap16(tlut[val & 0xF])); + *dst++ = decodePalettedPixel(tlut[val >> 4], tlutfmt); + *dst++ = decodePalettedPixel(tlut[val & 0xF], tlutfmt); } } -static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_) -{ - const u16* tlut = (u16*) tlut_; - for (int x = 0; x < 4; x++) - { - u8 val = src[x]; - *dst++ = decodeIA8Swapped(tlut[val >> 4]); - *dst++ = decodeIA8Swapped(tlut[val & 0xF]); - } -} - -static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_) -{ - const u16* tlut = (u16*) tlut_; - for (int x = 0; x < 4; x++) - { - u8 val = src[x]; - *dst++ = decode565RGBA(Common::swap16(tlut[val >> 4])); - *dst++ = decode565RGBA(Common::swap16(tlut[val & 0xF])); - } -} - -static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_) +static inline void decodeC8(u32 *dst, const u8 *src, const u8* tlut_, TlutFormat tlutfmt) { const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { u8 val = src[x]; - *dst++ = decode5A3RGBA(Common::swap16(tlut[val])); + *dst++ = decodePalettedPixel(tlut[val], tlutfmt); } } -static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_) -{ - const u16* tlut = (u16*) tlut_; - for (int x = 0; x < 8; x++) - { - *dst++ = decodeIA8Swapped(tlut[src[x]]); - } -} - -static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_) -{ - const u16* tlut = (u16*) tlut_; - for (int x = 0; x < 8; x++) - { - *dst++ = decode565RGBA(Common::swap16(tlut[src[x]])); - } -} - -static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_) +static inline void decodeC14X2(u32 *dst, const u16 *src, const u8* tlut_, TlutFormat tlutfmt) { const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); - *dst++ = decode5A3RGBA(Common::swap16(tlut[(val & 0x3FFF)])); - } -} - -static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_) -{ - const u16* tlut = (u16*) tlut_; - for (int x = 0; x < 4; x++) - { - u16 val = Common::swap16(src[x]); - *dst++ = decodeIA8Swapped(tlut[(val & 0x3FFF)]); - } -} - -static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_) -{ - const u16* tlut = (u16*) tlut_; - for (int x = 0; x < 4; x++) - { - u16 val = Common::swap16(src[x]); - *dst++ = decode565RGBA(Common::swap16(tlut[(val & 0x3FFF)])); + *dst++ = decodePalettedPixel(tlut[(val & 0x3FFF)], tlutfmt); } } @@ -255,28 +210,10 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he switch (texformat) { case GX_TF_C4: - if (tlutfmt == GX_TL_RGB5A3) - { - // Special decoding is required for TLUT format 5A3 - for (int y = 0; y < height; y += 8) - for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) - for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut); - } - else if (tlutfmt == GX_TL_IA8) - { - for (int y = 0; y < height; y += 8) - for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) - for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut); - } - else if (tlutfmt == GX_TL_RGB565) - { - for (int y = 0; y < height; y += 8) - for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) - for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut); - } + for (int y = 0; y < height; y += 8) + for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) + for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) + decodeC4(dst + (y + iy) * width + x, src + 4 * xStep, tlut, tlutfmt); break; case GX_TF_I4: { @@ -317,29 +254,10 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he } break; case GX_TF_C8: - if (tlutfmt == GX_TL_RGB5A3) - { - // Special decoding is required for TLUT format 5A3 - for (int y = 0; y < height; y += 4) - for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) - for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut); - } - else if (tlutfmt == GX_TL_IA8) - { - for (int y = 0; y < height; y += 4) - for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) - for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut); - } - else if (tlutfmt == GX_TL_RGB565) - { - for (int y = 0; y < height; y += 4) - for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) - for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut); - - } + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodeC8((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut, tlutfmt); break; case GX_TF_IA4: { @@ -366,28 +284,10 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he } break; case GX_TF_C14X2: - if (tlutfmt == GX_TL_RGB5A3) - { - // Special decoding is required for TLUT format 5A3 - for (int y = 0; y < height; y += 4) - for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) - for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); - } - else if (tlutfmt == GX_TL_IA8) - { - for (int y = 0; y < height; y += 4) - for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) - for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); - } - else if (tlutfmt == GX_TL_RGB565) - { - for (int y = 0; y < height; y += 4) - for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) - for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); - } + for (int y = 0; y < height; y += 4) + for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) + for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) + decodeC14X2(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut, tlutfmt); break; case GX_TF_RGB565: {