Merge branch 'immediate-removal' into GLSL-master

Conflicts:
	Source/Core/VideoCommon/Src/PixelShaderGen.cpp
	Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.cpp

immediate-removal is a new created branch seperated from master but reverted the revert of immediate-removal
so we get less conflicts by merging
This commit is contained in:
degasus
2013-01-24 16:58:28 +01:00
338 changed files with 40275 additions and 44293 deletions

View File

@ -55,6 +55,11 @@ void GetBPRegInfo(const u8* data, char* name, size_t name_size, char* desc, size
snprintf(name, name_size, #reg); \
(void)(reg);
case BPMEM_GENMODE: // 0x00
SetRegName(BPMEM_GENMODE);
// TODO: Description
break;
case BPMEM_DISPLAYCOPYFILER: // 0x01
// TODO: This is actually the sample pattern used for copies from an antialiased EFB
SetRegName(BPMEM_DISPLAYCOPYFILER);
@ -75,6 +80,28 @@ void GetBPRegInfo(const u8* data, char* name, size_t name_size, char* desc, size
}
break;
case BPMEM_BLENDMODE: // 0x41
{
SetRegName(BPMEM_BLENDMODE);
BlendMode mode; mode.hex = cmddata;
const char* dstfactors[] = { "0", "1", "src_color", "1-src_color", "src_alpha", "1-src_alpha", "dst_alpha", "1-dst_alpha" };
const char* srcfactors[] = { "0", "1", "dst_color", "1-dst_color", "src_alpha", "1-src_alpha", "dst_alpha", "1-dst_alpha" };
const char* logicmodes[] = { "0", "s & d", "s & ~d", "s", "~s & d", "d", "s ^ d", "s | d", "~(s | d)", "~(s ^ d)", "~d", "s | ~d", "~s", "~s | d", "~(s & d)", "1" };
snprintf(desc, desc_size, "Enable: %s\n"
"Logic ops: %s\n"
"Dither: %s\n"
"Color write: %s\n"
"Alpha write: %s\n"
"Dest factor: %s\n"
"Source factor: %s\n"
"Subtract: %s\n"
"Logic mode: %s\n",
no_yes[mode.blendenable], no_yes[mode.logicopenable], no_yes[mode.dither],
no_yes[mode.colorupdate], no_yes[mode.alphaupdate], dstfactors[mode.dstfactor],
srcfactors[mode.srcfactor], no_yes[mode.subtract], logicmodes[mode.logicmode]);
}
break;
case BPMEM_EFB_BR: // 0x4A
{
// TODO: Misleading name, should be BPMEM_EFB_WH instead
@ -148,6 +175,147 @@ void GetBPRegInfo(const u8* data, char* name, size_t name_size, char* desc, size
// TODO: Description
break;
#undef SET_REG_NAME
case BPMEM_TEV_COLOR_ENV: // 0xC0
case BPMEM_TEV_COLOR_ENV+2:
case BPMEM_TEV_COLOR_ENV+4:
case BPMEM_TEV_COLOR_ENV+8:
case BPMEM_TEV_COLOR_ENV+10:
case BPMEM_TEV_COLOR_ENV+12:
case BPMEM_TEV_COLOR_ENV+14:
case BPMEM_TEV_COLOR_ENV+16:
case BPMEM_TEV_COLOR_ENV+18:
case BPMEM_TEV_COLOR_ENV+20:
case BPMEM_TEV_COLOR_ENV+22:
case BPMEM_TEV_COLOR_ENV+24:
case BPMEM_TEV_COLOR_ENV+26:
case BPMEM_TEV_COLOR_ENV+28:
case BPMEM_TEV_COLOR_ENV+30:
{
SetRegName(BPMEM_TEV_COLOR_ENV);
TevStageCombiner::ColorCombiner cc; cc.hex = cmddata;
const char* tevin[] =
{
"prev.rgb", "prev.aaa",
"c0.rgb", "c0.aaa",
"c1.rgb", "c1.aaa",
"c2.rgb", "c2.aaa",
"tex.rgb", "tex.aaa",
"ras.rgb", "ras.aaa",
"ONE", "HALF", "konst.rgb", "ZERO",
};
const char* tevbias[] = { "0", "+0.5", "-0.5", "compare" };
const char* tevop[] = { "add", "sub" };
const char* tevscale[] = { "1", "2", "4", "0.5" };
const char* tevout[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
snprintf(desc, desc_size, "tev stage: %d\n"
"a: %s\n"
"b: %s\n"
"c: %s\n"
"d: %s\n"
"bias: %s\n"
"op: %s\n"
"clamp: %s\n"
"scale factor: %s\n"
"dest: %s\n",
(data[0] - BPMEM_TEV_COLOR_ENV)/2, tevin[cc.a], tevin[cc.b], tevin[cc.c], tevin[cc.d],
tevbias[cc.bias], tevop[cc.op], no_yes[cc.clamp], tevscale[cc.shift], tevout[cc.dest]);
break;
}
case BPMEM_TEV_ALPHA_ENV: // 0xC1
case BPMEM_TEV_ALPHA_ENV+2:
case BPMEM_TEV_ALPHA_ENV+4:
case BPMEM_TEV_ALPHA_ENV+6:
case BPMEM_TEV_ALPHA_ENV+8:
case BPMEM_TEV_ALPHA_ENV+10:
case BPMEM_TEV_ALPHA_ENV+12:
case BPMEM_TEV_ALPHA_ENV+14:
case BPMEM_TEV_ALPHA_ENV+16:
case BPMEM_TEV_ALPHA_ENV+18:
case BPMEM_TEV_ALPHA_ENV+20:
case BPMEM_TEV_ALPHA_ENV+22:
case BPMEM_TEV_ALPHA_ENV+24:
case BPMEM_TEV_ALPHA_ENV+26:
case BPMEM_TEV_ALPHA_ENV+28:
case BPMEM_TEV_ALPHA_ENV+30:
{
SetRegName(BPMEM_TEV_ALPHA_ENV);
TevStageCombiner::AlphaCombiner ac; ac.hex = cmddata;
const char* tevin[] =
{
"prev", "c0", "c1", "c2",
"tex", "ras", "konst", "ZERO",
};
const char* tevbias[] = { "0", "+0.5", "-0.5", "compare" };
const char* tevop[] = { "add", "sub" };
const char* tevscale[] = { "1", "2", "4", "0.5" };
const char* tevout[] = { "prev", "c0", "c1", "c2" };
snprintf(desc, desc_size, "tev stage: %d\n"
"a: %s\n"
"b: %s\n"
"c: %s\n"
"d: %s\n"
"bias: %s\n"
"op: %s\n"
"clamp: %s\n"
"scale factor: %s\n"
"dest: %s\n"
"ras sel: %d\n"
"tex sel: %d\n",
(data[0] - BPMEM_TEV_ALPHA_ENV)/2, tevin[ac.a], tevin[ac.b], tevin[ac.c], tevin[ac.d],
tevbias[ac.bias], tevop[ac.op], no_yes[ac.clamp], tevscale[ac.shift], tevout[ac.dest],
ac.rswap, ac.tswap);
break;
}
case BPMEM_ALPHACOMPARE: // 0xF3
{
SetRegName(BPMEM_ALPHACOMPARE);
AlphaTest test; test.hex = cmddata;
const char* functions[] = { "NEVER", "LESS", "EQUAL", "LEQUAL", "GREATER", "NEQUAL", "GEQUAL", "ALWAYS" };
const char* logic[] = { "AND", "OR", "XOR", "XNOR" };
snprintf(desc, desc_size, "test 1: %s (ref: %#02x)\n"
"test 2: %s (ref: %#02x)\n"
"logic: %s\n",
functions[test.comp0], test.ref0, functions[test.comp1], test.ref1, logic[test.logic]);
break;
}
#undef SetRegName
}
}
AlphaTest::TEST_RESULT AlphaTest::TestResult()
{
switch(logic)
{
case 0: // AND
if (comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS)
return PASS;
if (comp0 == ALPHACMP_NEVER || comp1 == ALPHACMP_NEVER)
return FAIL;
break;
case 1: // OR
if (comp0 == ALPHACMP_ALWAYS || comp1 == ALPHACMP_ALWAYS)
return PASS;
if (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER)
return FAIL;
break;
case 2: // XOR
if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_NEVER) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_ALWAYS))
return PASS;
if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER))
return FAIL;
break;
case 3: // XNOR
if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_NEVER) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_ALWAYS))
return FAIL;
if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER))
return PASS;
break;
}
return UNDETERMINED;
}

View File

@ -793,7 +793,7 @@ union PE_CONTROL
{
u32 pixel_format : 3; // PIXELFMT_X
u32 zformat : 3; // Z Compression for 16bit Z format
u32 zcomploc : 1; // 1: before tex stage
u32 early_ztest : 1; // 1: before tex stage
u32 unused : 17;
u32 rid : 8;
};
@ -857,7 +857,7 @@ union TevKSel
int getKA(int i) {return i?kasel1:kasel0;}
};
union AlphaFunc
union AlphaTest
{
struct
{
@ -868,6 +868,15 @@ union AlphaFunc
u32 logic : 2;
};
u32 hex;
enum TEST_RESULT
{
UNDETERMINED = 0,
FAIL = 1,
PASS = 2,
};
TEST_RESULT TestResult();
};
union UPE_Copy
@ -981,7 +990,7 @@ struct BPMemory
TevReg tevregs[4]; //0xE0
FogRangeParams fogRange;
FogParams fog; //0xEE,0xEF,0xF0,0xF1,0xF2
AlphaFunc alphaFunc; //0xF3
AlphaTest alpha_test; //0xF3
ZTex1 ztex1; //0xf4,0xf5
ZTex2 ztex2;
TevKSel tevksel[8];//0xf6,0xf7,f8,f9,fa,fb,fc,fd

View File

@ -359,9 +359,9 @@ void BPWritten(const BPCmd& bp)
PixelShaderManager::SetFogColorChanged();
break;
case BPMEM_ALPHACOMPARE: // Compare Alpha Values
PRIM_LOG("alphacmp: ref0=%d, ref1=%d, comp0=%d, comp1=%d, logic=%d", bpmem.alphaFunc.ref0,
bpmem.alphaFunc.ref1, bpmem.alphaFunc.comp0, bpmem.alphaFunc.comp1, bpmem.alphaFunc.logic);
PixelShaderManager::SetAlpha(bpmem.alphaFunc);
PRIM_LOG("alphacmp: ref0=%d, ref1=%d, comp0=%d, comp1=%d, logic=%d", bpmem.alpha_test.ref0,
bpmem.alpha_test.ref1, bpmem.alpha_test.comp0, bpmem.alpha_test.comp1, bpmem.alpha_test.logic);
PixelShaderManager::SetAlpha(bpmem.alpha_test);
break;
case BPMEM_BIAS: // BIAS
PRIM_LOG("ztex bias=0x%x", bpmem.ztex1.bias);
@ -501,18 +501,40 @@ void BPWritten(const BPCmd& bp)
// if this is different from 0, manual TMEM management is used (GX_PreloadEntireTexture).
if (bp.newvalue != 0)
{
// NOTE(neobrain): Apparently tmemodd doesn't affect hardware behavior at all (libogc uses it just as a buffer and switches its contents with tmemeven whenever this is called)
// TODO: Not quite sure if this is completely correct (likely not)
// NOTE: libogc's implementation of GX_PreloadEntireTexture seems flawed, so it's not necessarily a good reference for RE'ing this feature.
BPS_TmemConfig& tmem_cfg = bpmem.tmem_config;
u8* ram_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5);
u32 tmem_addr = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE;
u32 size = tmem_cfg.preload_tile_info.count * 32;
u8* src_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5); // TODO: Should we add mask here on GC?
u32 size = tmem_cfg.preload_tile_info.count * TMEM_LINE_SIZE;
u32 tmem_addr_even = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE;
// Check if the game has overflowed TMEM, and copy up to the limit.
// Paper Mario does this when entering the Great Boogly Tree (Chap 2)
if ((tmem_addr + size) > TMEM_SIZE)
size = TMEM_SIZE - tmem_addr;
if (tmem_cfg.preload_tile_info.type != 3)
{
if (tmem_addr_even + size > TMEM_SIZE)
size = TMEM_SIZE - tmem_addr_even;
memcpy(texMem + tmem_addr, ram_ptr, size);
memcpy(texMem + tmem_addr_even, src_ptr, size);
}
else // RGBA8 tiles (and CI14, but that might just be stupid libogc!)
{
// AR and GB tiles are stored in separate TMEM banks => can't use a single memcpy for everything
u32 tmem_addr_odd = tmem_cfg.preload_tmem_odd * TMEM_LINE_SIZE;
for (u32 i = 0; i < tmem_cfg.preload_tile_info.count; ++i)
{
// FIXME: Duplicate conditions.
if (tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE ||
tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE)
break;
memcpy(texMem + tmem_addr_even, src_ptr, TMEM_LINE_SIZE);
memcpy(texMem + tmem_addr_odd, src_ptr + TMEM_LINE_SIZE, TMEM_LINE_SIZE);
tmem_addr_even += TMEM_LINE_SIZE;
tmem_addr_odd += TMEM_LINE_SIZE;
src_ptr += TMEM_LINE_SIZE * 2;
}
}
}
break;

View File

@ -27,9 +27,8 @@
#include "VideoConfig.h"
#include "NativeVertexFormat.h"
static int AlphaPreTest();
static void StageHash(int stage, u32* out)
static void StageHash(u32 stage, u32* out)
{
out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24
u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now
@ -109,29 +108,17 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 compo
uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
uid->values[0] |= dstAlphaMode << 8; // 2
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth;
uid->values[0] |= DepthTextureEnable << 10; // 1
bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
uid->values[0] |= enablePL << 11; // 1
uid->values[0] |= enablePL << 10; // 1
if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4
u32 alphaPreTest = AlphaPreTest()+1;
if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 11; // 4
uid->values[0] |= alphaPreTest << 16; // 2
if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS))
{
// Courtesy of PreAlphaTest, we're done already ;)
// NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though.
uid->num_values = 1;
return;
}
AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult();
uid->values[0] |= alphaPreTest << 15; // 2
// numtexgens should be <= 8
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1
uid->values[0] |= xfregs.texMtxInfo[i].projection << (17+i); // 1
uid->values[1] = bpmem.genMode.numindstages; // 3
u32 indirectStagesUsed = 0;
@ -154,30 +141,24 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 compo
}
u32* ptr = &uid->values[2];
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i)
{
StageHash(i, ptr);
ptr += 4; // max: ptr = &uid->values[66]
}
ptr[0] |= bpmem.alphaFunc.comp0; // 3
ptr[0] |= bpmem.alphaFunc.comp1 << 3; // 3
ptr[0] |= bpmem.alphaFunc.logic << 6; // 2
ptr[0] |= bpmem.alpha_test.comp0; // 3
ptr[0] |= bpmem.alpha_test.comp1 << 3; // 3
ptr[0] |= bpmem.alpha_test.logic << 6; // 2
if (alphaPreTest == 0 || alphaPreTest == 2)
{
ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 8; // 3
if (DepthTextureEnable)
{
ptr[0] |= bpmem.ztex2.op << 11; // 2
ptr[0] |= bpmem.zcontrol.zcomploc << 13; // 1
ptr[0] |= bpmem.zmode.testenable << 14; // 1
ptr[0] |= bpmem.zmode.updateenable << 15; // 1
}
}
ptr[0] |= bpmem.ztex2.op << 8; // 2
ptr[0] |= bpmem.zcontrol.early_ztest << 10; // 1
ptr[0] |= bpmem.zmode.testenable << 11; // 1
ptr[0] |= bpmem.zmode.updateenable << 12; // 1
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 13; // 3
if (bpmem.fog.c_proj_fsel.fsel != 0)
{
ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1
@ -204,9 +185,8 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u
*ptr++ = bpmem.ztex2.hex; // 2
*ptr++ = bpmem.zcontrol.hex; // 3
*ptr++ = bpmem.zmode.hex; // 4
*ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6
*ptr++ = xfregs.numTexGen.hex; // 7
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 5
*ptr++ = xfregs.numTexGen.hex; // 6
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
@ -218,28 +198,28 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u
}
for (unsigned int i = 0; i < 8; ++i)
*ptr++ = xfregs.texMtxInfo[i].hex; // 8-15
*ptr++ = xfregs.texMtxInfo[i].hex; // 7-14
for (unsigned int i = 0; i < 16; ++i)
*ptr++ = bpmem.tevind[i].hex; // 16-31
*ptr++ = bpmem.tevind[i].hex; // 15-30
*ptr++ = bpmem.tevindref.hex; // 32
*ptr++ = bpmem.tevindref.hex; // 31
for (u32 i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times
for (u32 i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times
{
*ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
*ptr++ = bpmem.tevind[i].hex; // 35+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
*ptr++ = bpmem.combiners[i].colorC.hex; // 32+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 33+5*i
*ptr++ = bpmem.tevind[i].hex; // 34+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 35+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 36+5*i
}
ptr = &uid->values[113];
ptr = &uid->values[112];
*ptr++ = bpmem.alphaFunc.hex; // 113
*ptr++ = bpmem.alpha_test.hex; // 112
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
*ptr++ = bpmem.fogRange.Base.hex; // 115
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 113
*ptr++ = bpmem.fogRange.Base.hex; // 114
_assert_((ptr - uid->values) == uid->GetNumValues());
}
@ -308,10 +288,10 @@ static const char *tevKSelTableC[] = // KCSEL
"0.375f,0.375f,0.375f", // 3_8 = 0x05
"0.25f,0.25f,0.25f", // 1_4 = 0x06
"0.125f,0.125f,0.125f", // 1_8 = 0x07
"ERROR", // 0x08
"ERROR", // 0x09
"ERROR", // 0x0a
"ERROR", // 0x0b
"ERROR1", // 0x08
"ERROR2", // 0x09
"ERROR3", // 0x0a
"ERROR4", // 0x0b
I_KCOLORS"[0].rgb", // K0 = 0x0C
I_KCOLORS"[1].rgb", // K1 = 0x0D
I_KCOLORS"[2].rgb", // K2 = 0x0E
@ -344,14 +324,14 @@ static const char *tevKSelTableA[] = // KASEL
"0.375f",// 3_8 = 0x05
"0.25f", // 1_4 = 0x06
"0.125f",// 1_8 = 0x07
"ERROR", // 0x08
"ERROR", // 0x09
"ERROR", // 0x0a
"ERROR", // 0x0b
"ERROR", // 0x0c
"ERROR", // 0x0d
"ERROR", // 0x0e
"ERROR", // 0x0f
"ERROR5", // 0x08
"ERROR6", // 0x09
"ERROR7", // 0x0a
"ERROR8", // 0x0b
"ERROR9", // 0x0c
"ERROR10", // 0x0d
"ERROR11", // 0x0e
"ERROR12", // 0x0f
I_KCOLORS"[0].r", // K0_R = 0x10
I_KCOLORS"[1].r", // K1_R = 0x11
I_KCOLORS"[2].r", // K2_R = 0x12
@ -426,7 +406,7 @@ static const char *tevCInputTable[] = // CC
"float3(0.5f, 0.5f, 0.5f)", // HALF
"(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST
"float3(0.0f, 0.0f, 0.0f)", // ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR"
"PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4"
};
static const char *tevAInputTable[] = // CA
@ -448,17 +428,17 @@ static const char *tevAInputTable[] = // CA
"crastemp", // RASA,
"ckonsttemp", // KONST, (hw1 had quarter)
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8",
"PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12",
};
static const char *tevRasTable[] =
{
"colors_0",
"colors_1",
"ERROR", //2
"ERROR", //3
"ERROR", //4
"ERROR13", //2
"ERROR14", //3
"ERROR15", //4
"alphabump", // use bump alpha
"(alphabump*(255.0f/248.0f))", //normalized
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
@ -481,7 +461,6 @@ static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
static char swapModeTable[4][5];
static char text[16384];
static bool DepthTextureEnable;
struct RegisterState
{
@ -546,7 +525,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ;
if (ApiType == API_OPENGL)
{
@ -639,8 +617,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "out float4 ocol1;\n");
if (DepthTextureEnable)
WRITE(p, "float depth;\n");
WRITE(p, "float depth;\n");
WRITE(p, "float4 rawpos = gl_FragCoord;\n");
WRITE(p, "VARYIN float4 colors_02;\n");
@ -695,14 +672,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n",
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "",
DepthTextureEnable ? "\n out float depth : DEPTH," : "",
"\n out float depth : DEPTH,",
ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS");
}
else
{
WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
DepthTextureEnable ? "\n out float depth : SV_Depth," : "");
"\n out float depth : SV_Depth,");
}
WRITE(p, " in float4 colors_0 : COLOR0,\n");
@ -736,45 +713,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
}
}
int Pretest = AlphaPreTest();
if(Pretest >= 0 && !DepthTextureEnable)
{
if (!Pretest)
{
// alpha test will always fail, so restart the shader and just make it an empty function
WRITE(p, "\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
if(DepthTextureEnable)
WRITE(p, "\tdepth = 1.f;\n");
if(dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
if (ApiType == API_OPENGL)
{
if (DepthTextureEnable)
WRITE(p, "\tgl_FragDepth = depth;\n");
}
WRITE(p, "\tdiscard;\n");
if(ApiType != API_D3D11)
WRITE(p, "\treturn;\n");
}
else if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
WRITE(p, "\tocol0 = " I_ALPHA"[0].aaaa;\n");
}
if(!Pretest || dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
WRITE(p, "}\n");
return text;
}
}
WRITE(p, "\tfloat4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
"\tfloat3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
"\tfloat4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n"
"\tfloat3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n"
"\tfloat2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n"
"\tfloat4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n"
"\tfloat4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
"\tfloat4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
WRITE(p, " float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
" float4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n"
" float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n"
" float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
@ -874,34 +820,26 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
WRITE(p, "\tprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(Pretest == -1)
{
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
if (Pretest == AlphaTest::UNDETERMINED)
WriteAlphaTest(p, ApiType, dstAlphaMode);
}
if((bpmem.fog.c_proj_fsel.fsel != 0) || DepthTextureEnable)
{
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "\tfloat zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
}
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
if (DepthTextureEnable)
// Note: depth textures are disabled if early depth test is enabled
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable)
{
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)
{
if (bpmem.ztex2.op == ZTEXTURE_ADD)
WRITE(p, "\tzCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w + zCoord;\n");
else
WRITE(p, "\tzCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w;\n");
WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
(bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : "");
// scale to make result from frac correct
WRITE(p, "\tzCoord = zCoord * (16777215.0f/16777216.0f);\n");
WRITE(p, "\tzCoord = frac(zCoord);\n");
WRITE(p, "\tzCoord = zCoord * (16777216.0f/16777215.0f);\n");
}
WRITE(p, "\tdepth = zCoord;\n");
// scale to make result from frac correct
WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
WRITE(p, "zCoord = frac(zCoord);\n");
WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
}
WRITE(p, "depth = zCoord;\n");
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
WRITE(p, "\tocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n");
@ -923,8 +861,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
if (ApiType == API_OPENGL)
{
if (DepthTextureEnable)
WRITE(p, "\tgl_FragDepth = depth;\n");
WRITE(p, "\tgl_FragDepth = depth;\n");
}
WRITE(p, "}\n");
if (text[sizeof(text) - 1] != 0x7C)
@ -1316,39 +1253,6 @@ static const char *tevAlphaFunclogicTable[] =
" != ", // xor
" == " // xnor
};
static int AlphaPreTest()
{
u32 op = bpmem.alphaFunc.logic;
u32 comp[2] = {bpmem.alphaFunc.comp0, bpmem.alphaFunc.comp1};
// First kill all the simple cases
switch(op)
{
case 0: // AND
if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) return false;
break;
case 1: // OR
if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)return false;
break;
case 2: // XOR
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS))
return true;
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER))
return false;
break;
case 3: // XNOR
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS))
return false;
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER))
return true;
break;
default: PanicAlert("bad logic for alpha test? %08x", op);
}
return -1;
}
static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode)
{
@ -1362,41 +1266,38 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode
// using discard then return works the same in cg and dx9 but not in dx11
WRITE(p, "\tif(!( ");
int compindex = bpmem.alphaFunc.comp0 % 8;
int compindex = bpmem.alpha_test.comp0;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table
WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lookup the logic op
WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op
compindex = bpmem.alphaFunc.comp1 % 8;
compindex = bpmem.alpha_test.comp1;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
WRITE(p, ")) {\n");
WRITE(p, "\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
if (DepthTextureEnable)
WRITE(p, "\t\tdepth = 1.f;\n");
WRITE(p, "depth = 1.f;\n");
// HAXX: zcomploc is a way to control whether depth test is done before
// or after texturing and alpha test. PC GPU does depth test before texturing ONLY if depth value is
// not updated during shader execution.
// HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
// or after texturing and alpha test. PC GPUs have no way to support this
// feature properly as of 2012: depth buffer and depth test are not
// programmable and the depth test is always done after texturing.
// Most importantly, PC GPUs do not allow writing to the z buffer without
// writing a color value (unless color writing is disabled altogether).
// We implement "depth test before texturing" by discarding the fragment
// when the alpha test fail. This is not a correct implementation because
// even if the depth test fails the fragment could be alpha blended.
// this implemnetation is a trick to keep speed.
// the correct, but slow, way to implement a correct zComploc is :
// 1 - if zcomplock is enebled make a first pass, with color channel write disabled updating only
// depth channel.
// 2 - in the next pass disable depth chanel update, but proccess the color data normally
// this way is the only CORRECT way to emulate perfectly the zcomplock behaviour
if (!(bpmem.zcontrol.zcomploc && bpmem.zmode.updateenable))
// even if the depth test fails the fragment could be alpha blended, but
// we don't have a choice.
if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable))
{
WRITE(p, "\t\tdiscard;\n");
if (ApiType != API_D3D11)
WRITE(p, "\t\treturn;\n");
}
WRITE(p, "\t}\n");
WRITE(p, "}\n");
}
static const char *tevFogFuncsTable[] =

View File

@ -45,7 +45,7 @@
#define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 120
#define PIXELSHADERUID_MAX_VALUES_SAFE 115
// Annoying sure, can be removed once we get up to GLSL ~1.3
const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },

View File

@ -375,7 +375,7 @@ void PixelShaderManager::SetColorChanged(int type, int num, bool high)
PRIM_LOG("pixel %scolor%d: %f %f %f %f\n", type?"k":"", num, pf[0], pf[1], pf[2], pf[3]);
}
void PixelShaderManager::SetAlpha(const AlphaFunc& alpha)
void PixelShaderManager::SetAlpha(const AlphaTest& alpha)
{
if ((alpha.hex & 0xffff) != lastAlpha)
{

View File

@ -38,7 +38,7 @@ public:
// constant management, should be called after memory is committed
static void SetColorChanged(int type, int index, bool high);
static void SetAlpha(const AlphaFunc& alpha);
static void SetAlpha(const AlphaTest& alpha);
static void SetDestAlpha(const ConstantAlpha& alpha);
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
static void SetZTextureBias(u32 bias);

View File

@ -91,6 +91,9 @@ Renderer::Renderer() : frame_data(NULL), bLastFrameDumped(false)
#if defined _WIN32 || defined HAVE_LIBAV
bAVIDumping = false;
#endif
OSDChoice = 0;
OSDTime = 0;
}
Renderer::~Renderer()
@ -263,108 +266,109 @@ void Renderer::SetScreenshot(const char *filename)
// Create On-Screen-Messages
void Renderer::DrawDebugText()
{
if (!g_Config.bOSDHotKey)
return;
// OSD Menu messages
if (g_ActiveConfig.bOSDHotKey)
if (OSDChoice > 0)
{
if (OSDChoice > 0)
{
OSDTime = Common::Timer::GetTimeMs() + 3000;
OSDChoice = -OSDChoice;
}
if ((u32)OSDTime > Common::Timer::GetTimeMs())
{
const char* res_text = "";
switch (g_ActiveConfig.iEFBScale)
{
case 0:
res_text = "Auto (fractional)";
break;
case 1:
res_text = "Auto (integral)";
break;
case 2:
res_text = "Native";
break;
case 3:
res_text = "1.5x";
break;
case 4:
res_text = "2x";
break;
case 5:
res_text = "2.5x";
break;
case 6:
res_text = "3x";
break;
case 7:
res_text = "4x";
break;
}
const char* ar_text = "";
switch(g_ActiveConfig.iAspectRatio)
{
case ASPECT_AUTO:
ar_text = "Auto";
break;
case ASPECT_FORCE_16_9:
ar_text = "16:9";
break;
case ASPECT_FORCE_4_3:
ar_text = "4:3";
break;
case ASPECT_STRETCH:
ar_text = "Stretch";
break;
}
const char* const efbcopy_text = g_ActiveConfig.bEFBCopyEnable ?
(g_ActiveConfig.bCopyEFBToTexture ? "to Texture" : "to RAM") : "Disabled";
// The rows
const std::string lines[] =
{
std::string("3: Internal Resolution: ") + res_text,
std::string("4: Aspect Ratio: ") + ar_text + (g_ActiveConfig.bCrop ? " (crop)" : ""),
std::string("5: Copy EFB: ") + efbcopy_text,
std::string("6: Fog: ") + (g_ActiveConfig.bDisableFog ? "Disabled" : "Enabled"),
};
enum { lines_count = sizeof(lines)/sizeof(*lines) };
std::string final_yellow, final_cyan;
// If there is more text than this we will have a collision
if (g_ActiveConfig.bShowFPS)
{
final_yellow = final_cyan = "\n\n";
}
// The latest changed setting in yellow
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice == -i - 1)
final_yellow += lines[i];
final_yellow += '\n';
}
// The other settings in cyan
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice != -i - 1)
final_cyan += lines[i];
final_cyan += '\n';
}
// Render a shadow
g_renderer->RenderText(final_cyan.c_str(), 21, 21, 0xDD000000);
g_renderer->RenderText(final_yellow.c_str(), 21, 21, 0xDD000000);
//and then the text
g_renderer->RenderText(final_cyan.c_str(), 20, 20, 0xFF00FFFF);
g_renderer->RenderText(final_yellow.c_str(), 20, 20, 0xFFFFFF00);
}
OSDTime = Common::Timer::GetTimeMs() + 3000;
OSDChoice = -OSDChoice;
}
if ((u32)OSDTime <= Common::Timer::GetTimeMs())
return;
const char* res_text = "";
switch (g_ActiveConfig.iEFBScale)
{
case 0:
res_text = "Auto (fractional)";
break;
case 1:
res_text = "Auto (integral)";
break;
case 2:
res_text = "Native";
break;
case 3:
res_text = "1.5x";
break;
case 4:
res_text = "2x";
break;
case 5:
res_text = "2.5x";
break;
case 6:
res_text = "3x";
break;
case 7:
res_text = "4x";
break;
}
const char* ar_text = "";
switch(g_ActiveConfig.iAspectRatio)
{
case ASPECT_AUTO:
ar_text = "Auto";
break;
case ASPECT_FORCE_16_9:
ar_text = "16:9";
break;
case ASPECT_FORCE_4_3:
ar_text = "4:3";
break;
case ASPECT_STRETCH:
ar_text = "Stretch";
break;
}
const char* const efbcopy_text = g_ActiveConfig.bEFBCopyEnable ?
(g_ActiveConfig.bCopyEFBToTexture ? "to Texture" : "to RAM") : "Disabled";
// The rows
const std::string lines[] =
{
std::string("3: Internal Resolution: ") + res_text,
std::string("4: Aspect Ratio: ") + ar_text + (g_ActiveConfig.bCrop ? " (crop)" : ""),
std::string("5: Copy EFB: ") + efbcopy_text,
std::string("6: Fog: ") + (g_ActiveConfig.bDisableFog ? "Disabled" : "Enabled"),
};
enum { lines_count = sizeof(lines)/sizeof(*lines) };
std::string final_yellow, final_cyan;
// If there is more text than this we will have a collision
if (g_ActiveConfig.bShowFPS)
{
final_yellow = final_cyan = "\n\n";
}
// The latest changed setting in yellow
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice == -i - 1)
final_yellow += lines[i];
final_yellow += '\n';
}
// The other settings in cyan
for (int i = 0; i != lines_count; ++i)
{
if (OSDChoice != -i - 1)
final_cyan += lines[i];
final_cyan += '\n';
}
// Render a shadow
g_renderer->RenderText(final_cyan.c_str(), 21, 21, 0xDD000000);
g_renderer->RenderText(final_yellow.c_str(), 21, 21, 0xDD000000);
//and then the text
g_renderer->RenderText(final_cyan.c_str(), 20, 20, 0xFF00FFFF);
g_renderer->RenderText(final_yellow.c_str(), 20, 20, 0xFFFFFF00);
}
// TODO: remove

View File

@ -39,7 +39,7 @@
// TODO: Move these out of here.
extern int frameCount;
extern int OSDChoice, OSDTime;
extern int OSDChoice;
extern bool bLastFrameDumped;

View File

@ -43,7 +43,7 @@ struct Statistics
int numUniquePixelShaders;
float proj_0, proj_1, proj_2, proj_3, proj_4, proj_5, proj_6;
float proj_0, proj_1, proj_2, proj_3, proj_4, proj_5;
float gproj_0, gproj_1, gproj_2, gproj_3, gproj_4, gproj_5;
float gproj_6, gproj_7, gproj_8, gproj_9, gproj_10, gproj_11, gproj_12, gproj_13, gproj_14, gproj_15;

View File

@ -204,12 +204,16 @@ void TextureCache::ClearRenderTargets()
iter = textures.begin(),
tcend = textures.end();
for (; iter!=tcend; ++iter)
while (iter != tcend)
{
if (iter->second->type == TCET_EC_VRAM)
{
delete iter->second;
textures.erase(iter++);
}
else
++iter;
}
}
bool TextureCache::CheckForCustomTextureLODs(u64 tex_hash, int texformat, unsigned int levels)
@ -329,6 +333,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
if (from_tmem) src_data = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE];
else src_data = Memory::GetPointer(address);
// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data from the low tmem bank than it should)
tex_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (isPaletteTexture)
{
@ -409,10 +414,19 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
}
}
// TODO: RGBA8 textures are stored non-continuously in tmem, that might cause problems here when preloading is enabled
if (!using_custom_texture)
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth,
expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
{
if (!(texformat == GX_TF_RGBA8 && from_tmem))
{
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth,
expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
}
else
{
u8* src_data_gb = &texMem[bpmem.tex[stage/4].texImage2[stage%4].tmem_odd * TMEM_LINE_SIZE];
pcfmt = TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
}
}
// TODO: Cleanup. Plus, we still autogenerate mipmaps in certain cases (we shouldn't do that)
bool isPow2;

View File

@ -2515,6 +2515,40 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
}
}
void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth)
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
u16 widthBlks = (imageWidth >> 2) + 1; // TODO: Looks wrong. Shouldn't this be ((imageWidth-1)>>2)+1 ?
u32 base_ar = (tBlk * widthBlks + sBlk) << 4;
u32 base_gb = (tBlk * widthBlks + sBlk) << 4;
u16 blkS = s & 3;
u16 blkT = t & 3;
u32 blk_off = (blkT << 2) + blkS;
u32 offset_ar = (base_ar + blk_off) << 1;
u32 offset_gb = (base_gb + blk_off) << 1;
const u8* val_addr_ar = src_ar + offset_ar;
const u8* val_addr_gb = src_gb + offset_gb;
dst[3] = val_addr_ar[0]; // A
dst[0] = val_addr_ar[1]; // R
dst[1] = val_addr_gb[0]; // G
dst[2] = val_addr_gb[1]; // B
}
PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height)
{
// TODO for someone who cares: Make this less slow!
for (int y = 0; y < height; ++y)
for (int x = 0; x < width; ++x)
{
TexDecoder_DecodeTexelRGBA8FromTmem(dst, src_ar, src_gb, x, y, width-1);
dst += 4;
}
return PC_TEX_FMT_RGBA32;
}
const char* texfmt[] = {
// pixel

View File

@ -86,6 +86,8 @@ enum PC_TexFormat
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly = false);
PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth);
PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
#endif

View File

@ -205,7 +205,7 @@ void VertexManager::Flush()
}
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphafunc=0x%x", bpmem.genMode.numtevstages+1, bpmem.genMode.numindstages,
bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alphaFunc.hex>>16)&0xff);
bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff);
#endif
u32 usedtextures = 0;

View File

@ -542,6 +542,7 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
//if not early z culling will improve speed
// TODO: Can probably be dropped?
if (is_d3d)
{
WRITE(p, "o.pos.z = " I_DEPTHPARAMS".x * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y;\n");

View File

@ -169,14 +169,24 @@ void VertexShaderManager::Shutdown()
void VertexShaderManager::Dirty()
{
nTransformMatricesChanged[0] = 0; nTransformMatricesChanged[1] = 256;
nNormalMatricesChanged[0] = 0; nNormalMatricesChanged[1] = 96;
nPostTransformMatricesChanged[0] = 0; nPostTransformMatricesChanged[1] = 256;
nLightsChanged[0] = 0; nLightsChanged[1] = 0x80;
nTransformMatricesChanged[0] = 0;
nTransformMatricesChanged[1] = 256;
nNormalMatricesChanged[0] = 0;
nNormalMatricesChanged[1] = 96;
nPostTransformMatricesChanged[0] = 0;
nPostTransformMatricesChanged[1] = 256;
nLightsChanged[0] = 0;
nLightsChanged[1] = 0x80;
bPosNormalMatrixChanged = true;
bTexMatricesChanged[0] = bTexMatricesChanged[1] = true;
bTexMatricesChanged[0] = true;
bTexMatricesChanged[1] = true;
bProjectionChanged = true;
bPosNormalMatrixChanged = bTexMatricesChanged[0] = bTexMatricesChanged[1] = true;
nMaterialsChanged = 15;
}
@ -342,26 +352,28 @@ void VertexShaderManager::SetConstants()
if (bProjectionChanged)
{
bProjectionChanged = false;
float *rawProjection = xfregs.projection.rawProjection;
if (xfregs.rawProjection[6] == 0)
switch(xfregs.projection.type)
{
// Perspective
case GX_PERSPECTIVE:
g_fProjectionMatrix[0] = xfregs.rawProjection[0] * g_ActiveConfig.fAspectRatioHackW;
g_fProjectionMatrix[0] = rawProjection[0] * g_ActiveConfig.fAspectRatioHackW;
g_fProjectionMatrix[1] = 0.0f;
g_fProjectionMatrix[2] = xfregs.rawProjection[1];
g_fProjectionMatrix[2] = rawProjection[1];
g_fProjectionMatrix[3] = 0.0f;
g_fProjectionMatrix[4] = 0.0f;
g_fProjectionMatrix[5] = xfregs.rawProjection[2] * g_ActiveConfig.fAspectRatioHackH;
g_fProjectionMatrix[6] = xfregs.rawProjection[3];
g_fProjectionMatrix[5] = rawProjection[2] * g_ActiveConfig.fAspectRatioHackH;
g_fProjectionMatrix[6] = rawProjection[3];
g_fProjectionMatrix[7] = 0.0f;
g_fProjectionMatrix[8] = 0.0f;
g_fProjectionMatrix[9] = 0.0f;
g_fProjectionMatrix[10] = xfregs.rawProjection[4];
g_fProjectionMatrix[10] = rawProjection[4];
g_fProjectionMatrix[11] = xfregs.rawProjection[5];
g_fProjectionMatrix[11] = rawProjection[5];
g_fProjectionMatrix[12] = 0.0f;
g_fProjectionMatrix[13] = 0.0f;
@ -386,24 +398,24 @@ void VertexShaderManager::SetConstants()
SETSTAT_FT(stats.gproj_13, g_fProjectionMatrix[13]);
SETSTAT_FT(stats.gproj_14, g_fProjectionMatrix[14]);
SETSTAT_FT(stats.gproj_15, g_fProjectionMatrix[15]);
}
else
{
// Orthographic Projection
g_fProjectionMatrix[0] = xfregs.rawProjection[0];
break;
case GX_ORTHOGRAPHIC:
g_fProjectionMatrix[0] = rawProjection[0];
g_fProjectionMatrix[1] = 0.0f;
g_fProjectionMatrix[2] = 0.0f;
g_fProjectionMatrix[3] = xfregs.rawProjection[1];
g_fProjectionMatrix[3] = rawProjection[1];
g_fProjectionMatrix[4] = 0.0f;
g_fProjectionMatrix[5] = xfregs.rawProjection[2];
g_fProjectionMatrix[5] = rawProjection[2];
g_fProjectionMatrix[6] = 0.0f;
g_fProjectionMatrix[7] = xfregs.rawProjection[3];
g_fProjectionMatrix[7] = rawProjection[3];
g_fProjectionMatrix[8] = 0.0f;
g_fProjectionMatrix[9] = 0.0f;
g_fProjectionMatrix[10] = (g_ProjHack1.value + xfregs.rawProjection[4]) * ((g_ProjHack1.sign == 0) ? 1.0f : g_ProjHack1.sign);
g_fProjectionMatrix[11] = (g_ProjHack2.value + xfregs.rawProjection[5]) * ((g_ProjHack2.sign == 0) ? 1.0f : g_ProjHack2.sign);
g_fProjectionMatrix[10] = (g_ProjHack1.value + rawProjection[4]) * ((g_ProjHack1.sign == 0) ? 1.0f : g_ProjHack1.sign);
g_fProjectionMatrix[11] = (g_ProjHack2.value + rawProjection[5]) * ((g_ProjHack2.sign == 0) ? 1.0f : g_ProjHack2.sign);
g_fProjectionMatrix[12] = 0.0f;
g_fProjectionMatrix[13] = 0.0f;
@ -416,7 +428,7 @@ void VertexShaderManager::SetConstants()
*/
g_fProjectionMatrix[14] = 0.0f;
g_fProjectionMatrix[15] = (g_ProjHack3 && xfregs.rawProjection[0] == 2.0f ? 0.0f : 1.0f); //causes either the efb copy or bloom layer not to show if proj hack enabled
g_fProjectionMatrix[15] = (g_ProjHack3 && rawProjection[0] == 2.0f ? 0.0f : 1.0f); //causes either the efb copy or bloom layer not to show if proj hack enabled
SETSTAT_FT(stats.g2proj_0, g_fProjectionMatrix[0]);
SETSTAT_FT(stats.g2proj_1, g_fProjectionMatrix[1]);
@ -434,18 +446,21 @@ void VertexShaderManager::SetConstants()
SETSTAT_FT(stats.g2proj_13, g_fProjectionMatrix[13]);
SETSTAT_FT(stats.g2proj_14, g_fProjectionMatrix[14]);
SETSTAT_FT(stats.g2proj_15, g_fProjectionMatrix[15]);
SETSTAT_FT(stats.proj_0, xfregs.rawProjection[0]);
SETSTAT_FT(stats.proj_1, xfregs.rawProjection[1]);
SETSTAT_FT(stats.proj_2, xfregs.rawProjection[2]);
SETSTAT_FT(stats.proj_3, xfregs.rawProjection[3]);
SETSTAT_FT(stats.proj_4, xfregs.rawProjection[4]);
SETSTAT_FT(stats.proj_5, xfregs.rawProjection[5]);
SETSTAT_FT(stats.proj_6, xfregs.rawProjection[6]);
SETSTAT_FT(stats.proj_0, rawProjection[0]);
SETSTAT_FT(stats.proj_1, rawProjection[1]);
SETSTAT_FT(stats.proj_2, rawProjection[2]);
SETSTAT_FT(stats.proj_3, rawProjection[3]);
SETSTAT_FT(stats.proj_4, rawProjection[4]);
SETSTAT_FT(stats.proj_5, rawProjection[5]);
break;
default:
ERROR_LOG(VIDEO, "unknown projection type: %d", xfregs.projection.type);
}
PRIM_LOG("Projection: %f %f %f %f %f %f\n", xfregs.rawProjection[0], xfregs.rawProjection[1], xfregs.rawProjection[2], xfregs.rawProjection[3], xfregs.rawProjection[4], xfregs.rawProjection[5]);
PRIM_LOG("Projection: %f %f %f %f %f %f\n", rawProjection[0], rawProjection[1], rawProjection[2], rawProjection[3], rawProjection[4], rawProjection[5]);
if ((g_ActiveConfig.bFreeLook || g_ActiveConfig.bAnaglyphStereo ) && xfregs.rawProjection[6] == 0)
if ((g_ActiveConfig.bFreeLook || g_ActiveConfig.bAnaglyphStereo ) && xfregs.projection.type == GX_PERSPECTIVE)
{
Matrix44 mtxA;
Matrix44 mtxB;

View File

@ -75,7 +75,6 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "AnaglyphStereoSeparation", &iAnaglyphStereoSeparation, 200);
iniFile.Get("Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle, 0);
iniFile.Get("Settings", "EnablePixelLighting", &bEnablePixelLighting, 0);
iniFile.Get("Settings", "EnablePerPixelDepth", &bEnablePerPixelDepth, 0);
iniFile.Get("Settings", "MSAA", &iMultisampleMode, 0);
iniFile.Get("Settings", "EFBScale", &iEFBScale, 2); // native
@ -134,7 +133,6 @@ void VideoConfig::GameIniLoad(const char *ini_file)
iniFile.GetIfExists("Video_Settings", "AnaglyphStereoSeparation", &iAnaglyphStereoSeparation);
iniFile.GetIfExists("Video_Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle);
iniFile.GetIfExists("Video_Settings", "EnablePixelLighting", &bEnablePixelLighting);
iniFile.GetIfExists("Video_Settings", "EnablePerPixelDepth", &bEnablePerPixelDepth);
iniFile.GetIfExists("Video_Settings", "MSAA", &iMultisampleMode);
iniFile.GetIfExists("Video_Settings", "EFBScale", &iEFBScale); // integral
iniFile.GetIfExists("Video_Settings", "DstAlphaPass", &bDstAlphaPass);
@ -204,7 +202,6 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "AnaglyphStereoSeparation", iAnaglyphStereoSeparation);
iniFile.Set("Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle);
iniFile.Set("Settings", "EnablePixelLighting", bEnablePixelLighting);
iniFile.Set("Settings", "EnablePerPixelDepth", bEnablePerPixelDepth);
iniFile.Set("Settings", "ShowEFBCopyRegions", bShowEFBCopyRegions);
@ -270,7 +267,6 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini)
SET_IF_DIFFERS("Video_Settings", "AnaglyphStereoSeparation", iAnaglyphStereoSeparation);
SET_IF_DIFFERS("Video_Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle);
SET_IF_DIFFERS("Video_Settings", "EnablePixelLighting", bEnablePixelLighting);
SET_IF_DIFFERS("Video_Settings", "EnablePerPixelDepth", bEnablePerPixelDepth);
SET_IF_DIFFERS("Video_Settings", "MSAA", iMultisampleMode);
SET_IF_DIFFERS("Video_Settings", "EFBScale", iEFBScale); // integral
SET_IF_DIFFERS("Video_Settings", "DstAlphaPass", bDstAlphaPass);

View File

@ -133,7 +133,6 @@ struct VideoConfig
bool bZTPSpeedHack; // The Legend of Zelda: Twilight Princess
bool bUseBBox;
bool bEnablePixelLighting;
bool bEnablePerPixelDepth;
int iLog; // CONF_ bits
int iSaveTargetId; // TODO: Should be dropped

View File

@ -61,6 +61,9 @@
#define LIGHTATTN_NONE 2
#define LIGHTATTN_DIR 3
#define GX_PERSPECTIVE 0
#define GX_ORTHOGRAPHIC 1
#define XFMEM_SIZE 0x8000
#define XFMEM_POSMATRICES 0x000
#define XFMEM_POSMATRICES_END 0x100
@ -235,6 +238,12 @@ struct Viewport
float farZ;
};
struct Projection
{
float rawProjection[6];
u32 type; // only GX_PERSPECTIVE or GX_ORTHOGRAPHIC are allowed
};
struct XFRegisters
{
u32 error; // 0x1000
@ -257,10 +266,10 @@ struct XFRegisters
u32 unk5; // 0x1015
u32 unk6; // 0x1016
u32 unk7; // 0x1017
u32 MatrixIndexA; // 0x1018
u32 MatrixIndexB; // 0x1019
u32 MatrixIndexA; // 0x1018
u32 MatrixIndexB; // 0x1019
Viewport viewport; // 0x101a - 0x101f
float rawProjection[7]; // 0x1020 - 0x1026
Projection projection; // 0x1020 - 0x1026
u32 unk8[24]; // 0x1027 - 0x103e
NumTexGen numTexGen; // 0x103f
TexMtxInfo texMtxInfo[8]; // 0x1040 - 0x1047