dolphin/Source/Core/VideoCommon/TextureDecoder_Common.cpp
Scott Mansell b9be3245e1 Move common EFB copy code into VideoCommon
Addded a few duplicated depth copy texture formats to the enum
in TextureDecoder.h. These texture formats were already implemented
in TextureCacheBase and the ogl/dx11 texture cache implementations.
2015-09-06 21:16:51 +12:00

597 lines
14 KiB
C++

// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include <cmath>
#include "Common/Common.h"
#include "VideoCommon/LookUpTables.h"
#include "VideoCommon/sfont.inc"
#include "VideoCommon/TextureDecoder.h"
static bool TexFmt_Overlay_Enable = false;
static bool TexFmt_Overlay_Center = false;
// TRAM
// STATE_TO_SAVE
GC_ALIGNED16(u8 texMem[TMEM_SIZE]);
int TexDecoder_GetTexelSizeInNibbles(int format)
{
switch (format & 0x3f) {
case GX_TF_I4: return 1;
case GX_TF_I8: return 2;
case GX_TF_IA4: return 2;
case GX_TF_IA8: return 4;
case GX_TF_RGB565: return 4;
case GX_TF_RGB5A3: return 4;
case GX_TF_RGBA8: return 8;
case GX_TF_C4: return 1;
case GX_TF_C8: return 2;
case GX_TF_C14X2: return 4;
case GX_TF_CMPR: return 1;
case GX_CTF_R4: return 1;
case GX_CTF_RA4: return 2;
case GX_CTF_RA8: return 4;
case GX_CTF_A8: return 2;
case GX_CTF_R8: return 2;
case GX_CTF_G8: return 2;
case GX_CTF_B8: return 2;
case GX_CTF_RG8: return 4;
case GX_CTF_GB8: return 4;
case GX_TF_Z8: return 2;
case GX_TF_Z16: return 4;
case GX_TF_Z24X8: return 8;
case GX_CTF_Z4: return 1;
case GX_CTF_Z8H: return 2;
case GX_CTF_Z8M: return 2;
case GX_CTF_Z8L: return 2;
case GX_CTF_Z16R: return 4;
case GX_CTF_Z16L: return 4;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetTexelSizeInNibbles)", format);
return 1;
}
}
int TexDecoder_GetTextureSizeInBytes(int width, int height, int format)
{
return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2;
}
int TexDecoder_GetBlockWidthInTexels(u32 format)
{
switch (format)
{
case GX_TF_I4: return 8;
case GX_TF_I8: return 8;
case GX_TF_IA4: return 8;
case GX_TF_IA8: return 4;
case GX_TF_RGB565: return 4;
case GX_TF_RGB5A3: return 4;
case GX_TF_RGBA8: return 4;
case GX_TF_C4: return 8;
case GX_TF_C8: return 8;
case GX_TF_C14X2: return 4;
case GX_TF_CMPR: return 8;
case GX_CTF_R4: return 8;
case GX_CTF_RA4: return 8;
case GX_CTF_RA8: return 4;
case GX_CTF_A8: return 8;
case GX_CTF_R8: return 8;
case GX_CTF_G8: return 8;
case GX_CTF_B8: return 8;
case GX_CTF_RG8: return 4;
case GX_CTF_GB8: return 4;
case GX_TF_Z8: return 8;
case GX_TF_Z16: return 4;
case GX_TF_Z24X8: return 4;
case GX_CTF_Z4: return 8;
case GX_CTF_Z8H: return 8;
case GX_CTF_Z8M: return 8;
case GX_CTF_Z8L: return 8;
case GX_CTF_Z16R: return 4;
case GX_CTF_Z16L: return 4;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format);
return 8;
}
}
int TexDecoder_GetBlockHeightInTexels(u32 format)
{
switch (format)
{
case GX_TF_I4: return 8;
case GX_TF_I8: return 4;
case GX_TF_IA4: return 4;
case GX_TF_IA8: return 4;
case GX_TF_RGB565: return 4;
case GX_TF_RGB5A3: return 4;
case GX_TF_RGBA8: return 4;
case GX_TF_C4: return 8;
case GX_TF_C8: return 4;
case GX_TF_C14X2: return 4;
case GX_TF_CMPR: return 8;
case GX_CTF_R4: return 8;
case GX_CTF_RA4: return 4;
case GX_CTF_RA8: return 4;
case GX_CTF_A8: return 4;
case GX_CTF_R8: return 4;
case GX_CTF_G8: return 4;
case GX_CTF_B8: return 4;
case GX_CTF_RG8: return 4;
case GX_CTF_GB8: return 4;
case GX_TF_Z8: return 4;
case GX_TF_Z16: return 4;
case GX_TF_Z24X8: return 4;
case GX_CTF_Z4: return 8;
case GX_CTF_Z8H: return 4;
case GX_CTF_Z8M: return 4;
case GX_CTF_Z8L: return 4;
case GX_CTF_Z16R: return 4;
case GX_CTF_Z16L: return 4;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format);
return 4;
}
}
//returns bytes
int TexDecoder_GetPaletteSize(int format)
{
switch (format)
{
case GX_TF_C4: return 16 * 2;
case GX_TF_C8: return 256 * 2;
case GX_TF_C14X2: return 16384 * 2;
default:
return 0;
}
}
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center)
{
TexFmt_Overlay_Enable = enable;
TexFmt_Overlay_Center = center;
}
static const char* texfmt[] = {
// pixel
"I4", "I8", "IA4", "IA8",
"RGB565", "RGB5A3", "RGBA8", "0x07",
"C4", "C8", "C14X2", "0x0B",
"0x0C", "0x0D", "CMPR", "0x0F",
// Z-buffer
"0x10", "Z8", "0x12", "Z16",
"0x14", "0x15", "Z24X8", "0x17",
"0x18", "0x19", "0x1A", "0x1B",
"0x1C", "0x1D", "0x1E", "0x1F",
// pixel + copy
"CR4", "0x21", "CRA4", "CRA8",
"0x24", "0x25", "CYUVA8", "CA8",
"CR8", "CG8", "CB8", "CRG8",
"CGB8", "0x2D", "0x2E", "0x2F",
// Z + copy
"CZ4", "0x31", "0x32", "0x33",
"0x34", "0x35", "0x36", "0x37",
"0x38", "CZ8M", "CZ8L", "0x3B",
"CZ16L", "0x3D", "0x3E", "0x3F",
};
static void TexDecoder_DrawOverlay(u8 *dst, int width, int height, int texformat)
{
int w = std::min(width, 40);
int h = std::min(height, 10);
int xoff = (width - w) >> 1;
int yoff = (height - h) >> 1;
if (!TexFmt_Overlay_Center)
{
xoff = 0;
yoff = 0;
}
const char* fmt = texfmt[texformat & 15];
while (*fmt)
{
int xcnt = 0;
int nchar = sfont_map[(int)*fmt];
const unsigned char *ptr = sfont_raw[nchar]; // each char is up to 9x10
for (int x = 0; x < 9; x++)
{
if (ptr[x] == 0x78)
break;
xcnt++;
}
for (int y = 0; y < 10; y++)
{
for (int x = 0; x < xcnt; x++)
{
int *dtp = (int*)dst;
dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFFFFFFFF : 0xFF000000;
}
ptr += 9;
}
xoff += xcnt;
fmt++;
}
}
void TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
{
_TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt);
if (TexFmt_Overlay_Enable)
TexDecoder_DrawOverlay(dst, width, height, texformat);
}
static inline u32 DecodePixel_IA8(u16 val)
{
int a = val & 0xFF;
int i = val >> 8;
return i | (i << 8) | (i << 16) | (a << 24);
}
static inline u32 DecodePixel_RGB565(u16 val)
{
int r,g,b,a;
r=Convert5To8((val>>11) & 0x1f);
g=Convert6To8((val>>5 ) & 0x3f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
return r | (g<<8) | (b << 16) | (a << 24);
}
static inline u32 DecodePixel_RGB5A3(u16 val)
{
int r,g,b,a;
if ((val&0x8000))
{
r=Convert5To8((val>>10) & 0x1f);
g=Convert5To8((val>>5 ) & 0x1f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
}
else
{
a=Convert3To8((val>>12) & 0x7);
r=Convert4To8((val>>8 ) & 0xf);
g=Convert4To8((val>>4 ) & 0xf);
b=Convert4To8((val ) & 0xf);
}
return r | (g<<8) | (b << 16) | (a << 24);
}
static inline u32 DecodePixel_Paletted(u16 pixel, TlutFormat tlutfmt)
{
switch (tlutfmt)
{
case GX_TL_IA8:
return DecodePixel_IA8(pixel);
case GX_TL_RGB565:
return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel));
default:
return 0;
}
}
struct DXTBlock
{
u16 color1;
u16 color2;
u8 lines[4];
};
static inline u32 MakeRGBA(int r, int g, int b, int a)
{
return (a<<24)|(b<<16)|(g<<8)|r;
}
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut_, TlutFormat tlutfmt)
{
/* General formula for computing texture offset
//
u16 sBlk = s / blockWidth;
u16 tBlk = t / blockHeight;
u16 widthBlks = (width / blockWidth) + 1;
u32 base = (tBlk * widthBlks + sBlk) * blockWidth * blockHeight;
u16 blkS = s & (blockWidth - 1);
u16 blkT = t & (blockHeight - 1);
u32 blkOff = blkT * blockWidth + blkS;
*/
switch (texformat)
{
case GX_TF_C4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 3;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 7;
u32 blkOff = (blkT << 3) + blkS;
int rs = (blkOff & 1)?0:4;
u32 offset = base + (blkOff >> 1);
u8 val = (*(src + offset) >> rs) & 0xF;
u16 *tlut = (u16*) tlut_;
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
}
break;
case GX_TF_I4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 3;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 7;
u32 blkOff = (blkT << 3) + blkS;
int rs = (blkOff & 1)?0:4;
u32 offset = base + (blkOff >> 1);
u8 val = (*(src + offset) >> rs) & 0xF;
val = Convert4To8(val);
dst[0] = val;
dst[1] = val;
dst[2] = val;
dst[3] = val;
}
break;
case GX_TF_I8:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 3;
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
dst[0] = val;
dst[1] = val;
dst[2] = val;
dst[3] = val;
}
break;
case GX_TF_C8:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 3;
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
u16 *tlut = (u16*) tlut_;
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
}
break;
case GX_TF_IA4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5;
u16 blkS = s & 7;
u16 blkT = t & 3;
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
const u8 a = Convert4To8(val>>4);
const u8 l = Convert4To8(val&0xF);
dst[0] = l;
dst[1] = l;
dst[2] = l;
dst[3] = a;
}
break;
case GX_TF_IA8:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
*((u32*)dst) = DecodePixel_IA8(*valAddr);
}
break;
case GX_TF_C14X2:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
u16 val = Common::swap16(*valAddr) & 0x3FFF;
u16 *tlut = (u16*) tlut_;
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
}
break;
case GX_TF_RGB565:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
*((u32*)dst) = DecodePixel_RGB565(Common::swap16(*valAddr));
}
break;
case GX_TF_RGB5A3:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1;
const u16* valAddr = (u16*)(src + offset);
*((u32*)dst) = DecodePixel_RGB5A3(Common::swap16(*valAddr));
}
break;
case GX_TF_RGBA8:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 5; // shift by 5 is correct
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blkOff = (blkT << 2) + blkS;
u32 offset = (base + blkOff) << 1 ;
const u8* valAddr = src + offset;
dst[3] = valAddr[0];
dst[0] = valAddr[1];
dst[1] = valAddr[32];
dst[2] = valAddr[33];
}
break;
case GX_TF_CMPR:
{
u16 sDxt = s >> 2;
u16 tDxt = t >> 2;
u16 sBlk = sDxt >> 1;
u16 tBlk = tDxt >> 1;
u16 widthBlks = (imageWidth >> 3) + 1;
u32 base = (tBlk * widthBlks + sBlk) << 2;
u16 blkS = sDxt & 1;
u16 blkT = tDxt & 1;
u32 blkOff = (blkT << 1) + blkS;
u32 offset = (base + blkOff) << 3;
const DXTBlock* dxtBlock = (const DXTBlock*)(src + offset);
u16 c1 = Common::swap16(dxtBlock->color1);
u16 c2 = Common::swap16(dxtBlock->color2);
int blue1 = Convert5To8(c1 & 0x1F);
int blue2 = Convert5To8(c2 & 0x1F);
int green1 = Convert6To8((c1 >> 5) & 0x3F);
int green2 = Convert6To8((c2 >> 5) & 0x3F);
int red1 = Convert5To8((c1 >> 11) & 0x1F);
int red2 = Convert5To8((c2 >> 11) & 0x1F);
u16 ss = s & 3;
u16 tt = t & 3;
int colorSel = dxtBlock->lines[tt];
int rs = 6 - (ss << 1);
colorSel = (colorSel >> rs) & 3;
colorSel |= c1 > c2?0:4;
u32 color = 0;
switch (colorSel)
{
case 0:
case 4:
color = MakeRGBA(red1, green1, blue1, 255);
break;
case 1:
case 5:
color = MakeRGBA(red2, green2, blue2, 255);
break;
case 2:
color = MakeRGBA(red1+(red2-red1)/3, green1+(green2-green1)/3, blue1+(blue2-blue1)/3, 255);
break;
case 3:
color = MakeRGBA(red2+(red1-red2)/3, green2+(green1-green2)/3, blue2+(blue1-blue2)/3, 255);
break;
case 6:
color = MakeRGBA((int)ceil((float)(red1+red2)/2), (int)ceil((float)(green1+green2)/2), (int)ceil((float)(blue1+blue2)/2), 255);
break;
case 7:
color = MakeRGBA(red2, green2, blue2, 0);
break;
default:
color = 0;
break;
}
*((u32*)dst) = color;
}
break;
}
}
void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth)
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1; // TODO: Looks wrong. Shouldn't this be ((imageWidth-1)>>2)+1 ?
u32 base_ar = (tBlk * widthBlks + sBlk) << 4;
u32 base_gb = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blk_off = (blkT << 2) + blkS;
u32 offset_ar = (base_ar + blk_off) << 1;
u32 offset_gb = (base_gb + blk_off) << 1;
const u8* val_addr_ar = src_ar + offset_ar;
const u8* val_addr_gb = src_gb + offset_gb;
dst[3] = val_addr_ar[0]; // A
dst[0] = val_addr_ar[1]; // R
dst[1] = val_addr_gb[0]; // G
dst[2] = val_addr_gb[1]; // B
}
void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height)
{
// TODO for someone who cares: Make this less slow!
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
TexDecoder_DecodeTexelRGBA8FromTmem(dst, src_ar, src_gb, x, y, width-1);
dst += 4;
}
}
}