diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index b3b8c07e18..bfe971f3ad 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -154,13 +154,13 @@ const float epsilon8bit = 1.0f / 255.0f; static const char *tevKSelTableC[] = // KCSEL { "1.0f,1.0f,1.0f", // 1 = 0x00 - "0.875,0.875,0.875", // 7_8 = 0x01 - "0.75,0.75,0.75", // 3_4 = 0x02 - "0.625,0.625,0.625", // 5_8 = 0x03 - "0.5,0.5,0.5", // 1_2 = 0x04 - "0.375,0.375,0.375", // 3_8 = 0x05 - "0.25,0.25,0.25", // 1_4 = 0x06 - "0.125,0.125,0.125", // 1_8 = 0x07 + "0.875f,0.875f,0.875f", // 7_8 = 0x01 + "0.75f,0.75f,0.75f", // 3_4 = 0x02 + "0.625f,0.625f,0.625f", // 5_8 = 0x03 + "0.5f,0.5f,0.5f", // 1_2 = 0x04 + "0.375f,0.375f,0.375f", // 3_8 = 0x05 + "0.25f,0.25f,0.25f", // 1_4 = 0x06 + "0.125f,0.125f,0.125f", // 1_8 = 0x07 "ERROR", // 0x08 "ERROR", // 0x09 "ERROR", // 0x0a @@ -266,7 +266,7 @@ static const char *tevCInputTable[] = // CC "rastemp.rgb", // RASC, "rastemp.aaa", // RASA, "float3(1.0f,1.0f,1.0f)", // ONE, - "float3(.5f,.5f,.5f)", // HALF, + "float3(0.5f,0.5f,0.5f)", // HALF, "konsttemp.rgb", // KONST, "float3(0.0f,0.0f,0.0f)", // ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", @@ -291,7 +291,7 @@ static const char *tevCInputTable2[] = // CC "rastemp", // RASC, "(rastemp.aaa)", // RASA, "float3(1.0f,1.0f,1.0f)", // ONE - "float3(.5f,.5f,.5f)", // HALF + "float3(0.5f,0.5f,0.5f)", // HALF "konsttemp", //"konsttemp.rgb", // KONST "float3(0.0f,0.0f,0.0f)", // ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", @@ -326,7 +326,7 @@ static const char *tevAInputTable2[] = // CA "textemp", // TEXA, "rastemp", // RASA, "konsttemp", // KONST, (hw1 had quarter) - "float4(0,0,0,0)", // ZERO + "float4(0.0,0.0,0.0,0.0)", // ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", @@ -342,7 +342,7 @@ static const char *tevRasTable[] = "ERROR", //4 "alphabump", // use bump alpha "(alphabump*(255.0f/248.0f))", //normalized - "float4(0,0,0,0)", // zero + "float4(0.0,0.0,0.0,0.0)", // zero }; static const char *alphaRef[2] = @@ -473,7 +473,7 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL char* pmainstart = p; WRITE(p, " float4 c0="I_COLORS"[1],c1="I_COLORS"[2],c2="I_COLORS"[3],prev=float4(0.0f,0.0f,0.0f,0.0f),textemp,rastemp,konsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n" - " float3 comp16 = float3(1,255,0), comp24 = float3(1,255,255*255);\n" + " float3 comp16 = float3(1.0f,255.0f,0.0f), comp24 = float3(1.0f,255.0f,255.0f*255.0f);\n" " float4 alphabump=0;\n" " float3 tevcoord;\n" " float2 wrappedcoord, tempcoord;\n\n"); @@ -533,9 +533,8 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL { // alpha test will always fail, so restart the shader and just make it an empty function p = pmainstart; - WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); - //WRITE(p, "discard;\n"); WRITE(p, "ocol0 = 0;\n"); + WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); } else { @@ -578,19 +577,28 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL) { // write the bump alpha if (bpmem.tevind[n].fmt == ITF_8) - WRITE(p, "alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt, - tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); + WRITE(p, "alphabump = indtex%d.%s %s;\n", + bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs], + tevIndAlphaScale[bpmem.tevind[n].fmt]); else { // donkopunchstania: really bad way to do this // cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0 // omitting fract seems to work as well - WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt, - tevIndAlphaSel[bpmem.tevind[n].bs]); + WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs]); WRITE(p, " alphabump = 1.0f;\n"); WRITE(p, "else\n"); - WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n", bpmem.tevind[n].bt, - tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); + WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n", + bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs], + tevIndAlphaScale[bpmem.tevind[n].fmt]); + /*WRITE(p, " alphabump = indtex%d.%s %s;\n", + bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs], + tevIndAlphaScale[bpmem.tevind[n].fmt]); + WRITE(p, "if (alphabump > 1.0f ){ alphabump = fract ( alphabump );if (alphabump == 0.0f ) alphabump = 1.0f;}\n");*/ + } } @@ -669,7 +677,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL) SampleTexture(p, "textemp", "tevcoord", texswap, texmap, texture_mask, HLSL); } else - WRITE(p, "textemp=float4(1,1,1,1);\n"); + WRITE(p, "textemp=float4(1.0,1.0,1.0,1.0);\n"); int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); @@ -720,23 +728,41 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL) case TEVCMP_R8_GT: case TEVCMP_RGB8_GT: // per component compares WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : float3(0.0f,0.0f,0.0f))", - tevCInputTable[cc.d], tevCInputTable2[cc.a], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable2[cc.b], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable[cc.c]); + tevCInputTable[cc.d], + tevCInputTable2[cc.a], + cmp==TEVCMP_R8_GT?"r":"rgb", + tevCInputTable2[cc.b], + cmp==TEVCMP_R8_GT?"r":"rgb", + tevCInputTable[cc.c]); break; case TEVCMP_R8_EQ: case TEVCMP_RGB8_EQ: WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f))", - tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], epsilon8bit, tevCInputTable[cc.c]); + tevCInputTable[cc.d], + tevCInputTable2[cc.a], + tevCInputTable2[cc.b], + epsilon8bit, + tevCInputTable[cc.c]); break; case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f))", - tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_GT?"16":"24", tevCInputTable[cc.c]); + tevCInputTable[cc.d], + tevCInputTable2[cc.a], + tevCInputTable2[cc.b], + cmp==TEVCMP_GR16_GT?"16":"24", + tevCInputTable[cc.c]); break; case TEVCMP_GR16_EQ: case TEVCMP_BGR24_EQ: WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f))", - tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_EQ?"16":"24", epsilon8bit, tevCInputTable[cc.c]); + tevCInputTable[cc.d], + tevCInputTable2[cc.a], + tevCInputTable2[cc.b], + cmp==TEVCMP_GR16_EQ?"16":"24", + epsilon8bit, + tevCInputTable[cc.c]); break; default: WRITE(p, "float3(0.0f,0.0f,0.0f)"); @@ -785,23 +811,41 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL) case TEVCMP_R8_GT: case TEVCMP_A8_GT: WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : 0)", - tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]); + tevAInputTable[ac.d], + tevAInputTable2[ac.a], + cmp==TEVCMP_R8_GT?"r":"a", + tevAInputTable2[ac.b], + cmp==TEVCMP_R8_GT?"r":"a", + tevAInputTable[ac.c]); break; case TEVCMP_R8_EQ: case TEVCMP_A8_EQ: - WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : 0)", - tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon8bit,tevAInputTable[ac.c]); + WRITE(p, " %s + (abs(%s.r - %s.r)<= %f ? %s : 0)", + tevAInputTable[ac.d], + tevAInputTable2[ac.a], + tevAInputTable2[ac.b], + epsilon8bit, + tevAInputTable[ac.c]); break; case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)", - tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]); + tevAInputTable[ac.d], + tevAInputTable2[ac.a], + tevAInputTable2[ac.b], + cmp==TEVCMP_GR16_GT?"16":"24", + tevAInputTable[ac.c]); break; case TEVCMP_GR16_EQ: case TEVCMP_BGR24_EQ: - WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)", - tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_EQ?"16":"24",epsilon8bit,tevAInputTable[ac.c]); + WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<=%f ? %s : 0)", + tevAInputTable[ac.d], + tevAInputTable2[ac.a], + tevAInputTable2[ac.b], + cmp==TEVCMP_GR16_EQ?"16":"24", + epsilon8bit, + tevAInputTable[ac.c]); break; default: WRITE(p, "0)"); @@ -851,21 +895,37 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con } } -static void WriteAlphaCompare(char *&p, int num, int comp) +static const char *tevAlphaFuncsTable[] = { - switch(comp) - { - case ALPHACMP_ALWAYS: WRITE(p, "(false)"); break; - case ALPHACMP_NEVER: WRITE(p, "(true)"); break; - case ALPHACMP_LEQUAL: WRITE(p, "(prev.a > %s)",alphaRef[num]); break; - case ALPHACMP_LESS: WRITE(p, "(prev.a >= %s - %f)",alphaRef[num],epsilon8bit*0.5f);break; - case ALPHACMP_GEQUAL: WRITE(p, "(prev.a < %s)",alphaRef[num]); break; - case ALPHACMP_GREATER: WRITE(p, "(prev.a <= %s + %f)",alphaRef[num],epsilon8bit*0.5f);break; - case ALPHACMP_EQUAL: WRITE(p, "(abs(prev.a-%s)>%f)",alphaRef[num],epsilon8bit*2); break; - case ALPHACMP_NEQUAL: WRITE(p, "(abs(prev.a-%s)<%f)",alphaRef[num],epsilon8bit*2); break; - default: PanicAlert("Bad Alpha Compare! %08x", comp); - } -} + "(false)", //ALPHACMP_NEVER 0 + "(prev.a < %s + %f)", //ALPHACMP_LESS 1 + "(abs( prev.a - %s ) <= %f)", //ALPHACMP_EQUAL 2 + "(prev.a <= %s + %f)", //ALPHACMP_LEQUAL 3 + "(prev.a > %s - %f)", //ALPHACMP_GREATER 4 + "(abs( prev.a - %s ) > %f)", //ALPHACMP_NEQUAL 5 + "(prev.a >= %s - %f)", //ALPHACMP_GEQUAL 6 + "(true)" //ALPHACMP_ALWAYS 7 +}; + +static const float tevAlphaDeltas[] = +{ + 0.0f, //ALPHACMP_NEVER 0 + epsilon8bit*0.5f, //ALPHACMP_LESS 1 + epsilon8bit, //ALPHACMP_EQUAL 2 + epsilon8bit*0.5f, //ALPHACMP_LEQUAL 3 + epsilon8bit*0.5f, //ALPHACMP_GREATER 4 + epsilon8bit, //ALPHACMP_NEQUAL 5 + epsilon8bit*0.5f, //ALPHACMP_GEQUAL 6 + 0.0f //ALPHACMP_ALWAYS 7 +}; + +static const char *tevAlphaFunclogicTable[] = +{ + " && ", // and + " || ", // or + " != ", // xor + " == " // xnor +}; static bool WriteAlphaTest(char *&p, bool HLSL) { @@ -876,38 +936,22 @@ static bool WriteAlphaTest(char *&p, bool HLSL) switch(op) { case 0: // AND - if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) - return true; - if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) - { - WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); - return false; - } + if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true; + if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) return false; break; case 1: // OR - if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) - return true; - if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) - { - WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); - return false; - } + if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true; + if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)return false; break; case 2: // XOR if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) return true; if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) - { - WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); - return false; - } + return false; break; case 3: // XNOR if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) - { - WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); - return false; - } + return false; if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) return true; break; @@ -918,78 +962,67 @@ static bool WriteAlphaTest(char *&p, bool HLSL) if (HLSL) WRITE(p, "clip( "); else - WRITE(p, "discard( "); + WRITE(p, "discard(!( "); - WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0); + int compindex = bpmem.alphaFunc.comp0 % 8; + WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0],tevAlphaDeltas[compindex]); - // negated because testing the inverse condition - switch (bpmem.alphaFunc.logic) - { - case 0: WRITE(p, " || "); break; // and - case 1: WRITE(p, " && "); break; // or - case 2: WRITE(p, " == "); break; // xor - case 3: WRITE(p, " != "); break; // xnor - default: break; - } - - WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1); + WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]); + + compindex = bpmem.alphaFunc.comp1 % 8; + WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1],tevAlphaDeltas[compindex]); if (HLSL) { // clip works differently than discard - discard takes a bool, clip takes a value that kills the pixel on negative - WRITE(p, " ? -1 : 1);\n"); + WRITE(p, " ? 1 : -1);\n"); } else { - WRITE(p, ");\n"); + WRITE(p, "));\n"); } - return true; } +static const char *tevFogFuncsTable[] = +{ + "", //No Fog + "", //? + "", //Linear + "", //? + " fog = 1.0f - pow(2, -8.0f * fog);\n", //exp + " fog = 1.0f - pow(2, -8.0f * fog * fog);\n", //exp2 + " fog = pow(2, -8.0f * (1.0f - fog));\n", //backward exp + " fog = 1.0f - fog;\n fog = pow(2, -8.0f * fog * fog);\n" //backward exp2 +}; + static void WriteFog(char *&p) { - bool enabled = bpmem.fog.c_proj_fsel.fsel == 0 ? false : true; + if(bpmem.fog.c_proj_fsel.fsel == 0)return;//no Fog - if (enabled) + if (bpmem.fog.c_proj_fsel.proj == 0) { - if (bpmem.fog.c_proj_fsel.proj == 0) - { - // perspective - // ze = A/(B - Zs) - WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n"); - } - else - { - // orthographic - // ze = a*Zs - WRITE (p, " float ze = "I_FOG"[1].x * depth;\n"); - } - - //WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n"); - WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n"); - - switch (bpmem.fog.c_proj_fsel.fsel) - { - case 0: // TODO - No fog? - break; - case 2: // linear - // empty - break; - case 4: // exp - WRITE(p, " fog = 1.0f - pow(2, -8.0f * fog);\n"); - break; - case 5: // exp2 - WRITE(p, " fog = 1.0f - pow(2, -8.0f * fog * fog);\n"); - break; - case 6: // backward exp - WRITE(p, " fog = 1.0f - fog;\n"); - WRITE(p, " fog = pow(2, -8.0f * fog);\n"); - break; - case 7: // backward exp2 - WRITE(p, " fog = 1.0f - fog;\n"); - WRITE(p, " fog = pow(2, -8.0f * fog * fog);\n"); - break; - default: WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); - } - - WRITE(p, " prev.rgb = (1.0f - fog) * prev.rgb + (fog * "I_FOG"[0].rgb);\n"); + // perspective + // ze = A/(B - Zs) + WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n"); + } + else + { + // orthographic + // ze = a*Zs + WRITE (p, " float ze = "I_FOG"[1].x * depth;\n"); } + + //WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n"); + WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n"); + + if(bpmem.fog.c_proj_fsel.fsel > 3) + { + WRITE(p, tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); + } + else + { + if(bpmem.fog.c_proj_fsel.fsel != 2) + WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); + } + + WRITE(p, " prev.rgb = (1.0f - fog) * prev.rgb + (fog * "I_FOG"[0].rgb);\n"); + } diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 1263f52cfa..716a4552d6 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -30,6 +30,7 @@ #define WRITE p+=sprintf static char text[16384]; +static bool IntensityConstantAdded = false; namespace TextureConversionShader { @@ -157,7 +158,13 @@ void WriteSampleColor(char*& p, const char* colorComp, const char* dest) void WriteColorToIntensity(char*& p, const char* src, const char* dest) { - WRITE(p, " %s = (0.257f * %s.r) + (0.504f * %s.g) + (0.098f * %s.b) + 0.0625f;\n", dest, src, src, src); + if(!IntensityConstantAdded) + { + WRITE(p, " float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n"); + IntensityConstantAdded = true; + } + //WRITE(p, " %s = (0.257f * %s.r) + (0.504f * %s.g) + (0.098f * %s.b) + 0.0625f;\n", dest, src, src, src); + WRITE(p, " %s = dot(IntensityConst.rgb, %s.rgb) + IntensityConst.a;\n", dest, src); } void WriteIncrementSampleX(char*& p) @@ -171,6 +178,12 @@ void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest) WRITE(p, " %s = floor(%s * %ff);\n", dest, src, result); } +void WriteEncoderEnd(char* p) +{ + WRITE(p, "}\n"); + IntensityConstantAdded = false; +} + void WriteI8Encoder(char* p) { WriteSwizzler(p, GX_TF_I8); @@ -191,7 +204,7 @@ void WriteI8Encoder(char* p) WriteSampleColor(p, "rgb", "texSample"); WriteColorToIntensity(p, "texSample", "ocol0.a"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteI4Encoder(char* p) @@ -236,7 +249,7 @@ void WriteI4Encoder(char* p) WriteToBitDepth(p, 4, "color1", "color1"); WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteIA8Encoder(char* p) @@ -253,7 +266,7 @@ void WriteIA8Encoder(char* p) WRITE(p, " ocol0.r = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "ocol0.a"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteIA4Encoder(char* p) @@ -286,7 +299,7 @@ void WriteIA4Encoder(char* p) WriteToBitDepth(p, 4, "color1", "color1"); WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteRGB565Encoder(char* p) @@ -321,7 +334,7 @@ void WriteRGB565Encoder(char* p) WRITE(p, " ocol0.a = ocol0.a + gLower * 32.0f;\n"); WRITE(p, " ocol0 = ocol0 / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteRGB5A3Encoder(char* p) @@ -388,7 +401,7 @@ void WriteRGB5A3Encoder(char* p) WRITE(p, "}\n"); WRITE(p, " ocol0 = ocol0 / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteRGBA4443Encoder(char* p) @@ -414,7 +427,7 @@ void WriteRGBA4443Encoder(char* p) WriteToBitDepth(p, 4, "texSample.b", "color1.a"); WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteRGBA8Encoder(char* p) @@ -444,7 +457,7 @@ void WriteRGBA8Encoder(char* p) WRITE(p, " ocol0 = (cl0 * color0) + (cl1 * color1);\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteC4Encoder(char* p, const char* comp) @@ -480,7 +493,7 @@ void WriteC4Encoder(char* p, const char* comp) WriteToBitDepth(p, 4, "color1", "color1"); WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteC8Encoder(char* p, const char* comp) @@ -498,7 +511,7 @@ void WriteC8Encoder(char* p, const char* comp) WriteSampleColor(p, comp, "ocol0.a"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteCC4Encoder(char* p, const char* comp) @@ -531,7 +544,7 @@ void WriteCC4Encoder(char* p, const char* comp) WriteToBitDepth(p, 4, "color1", "color1"); WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteCC8Encoder(char* p, const char* comp) @@ -543,7 +556,7 @@ void WriteCC8Encoder(char* p, const char* comp) WriteSampleColor(p, comp, "ocol0.ra"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteZ8Encoder(char* p, const char* multiplier) @@ -567,7 +580,7 @@ void WriteZ8Encoder(char* p, const char* multiplier) WriteSampleColor(p, "b", "depth"); WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteZ16Encoder(char* p) @@ -588,7 +601,7 @@ void WriteZ16Encoder(char* p) WRITE(p, " ocol0.r = frac(depth * 256.0f);\n"); WRITE(p, " ocol0.a = depth;\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteZ16LEncoder(char* p) @@ -609,7 +622,7 @@ void WriteZ16LEncoder(char* p) WRITE(p, " ocol0.r = frac(depth * 65536.0f);\n"); WRITE(p, " ocol0.a = frac(depth * 256.0f);\n"); - WRITE(p, "}\n"); + WriteEncoderEnd(p); } void WriteZ24Encoder(char* p) @@ -637,8 +650,8 @@ void WriteZ24Encoder(char* p) WRITE(p, " ocol0.g = frac(depth0 * 65536.0f);\n"); WRITE(p, " ocol0.r = 1.0f;\n"); WRITE(p, " ocol0.a = frac(depth0 * 65536.0f);\n"); - WRITE(p, " }\n" - "}\n"); + WRITE(p, " }\n"); + WriteEncoderEnd(p); } const char *GenerateEncodingShader(u32 format) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp index 544901170f..f9d73bc169 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp @@ -40,7 +40,7 @@ static LPDIRECT3DSURFACE9 s_efb_depth_OffScreenReadBuffer; static D3DFORMAT s_efb_color_surface_Format; static D3DFORMAT s_efb_depth_surface_Format; #undef CHECK -#define CHECK(hr,Message) //if (FAILED(hr)) { PanicAlert(__FUNCTION__ " FAIL: %s" ,Message); } +#define CHECK(hr,Message) if (FAILED(hr)) { PanicAlert(__FUNCTION__ " FAIL: %s" ,Message); } @@ -102,87 +102,11 @@ void Create() if (g_ActiveConfig.bEFBAccessEnable) { //depth format in prefered order - D3DFORMAT *DepthTexFormats = new D3DFORMAT[7]; - DepthTexFormats[0] = (D3DFORMAT)MAKEFOURCC('D','F','2','4'); - DepthTexFormats[1] = (D3DFORMAT)MAKEFOURCC('I','N','T','Z'); - DepthTexFormats[2] = (D3DFORMAT)MAKEFOURCC('R','A','W','Z'); - DepthTexFormats[3] = (D3DFORMAT)MAKEFOURCC('D','F','1','6'); - DepthTexFormats[4] = D3DFMT_D32F_LOCKABLE; - DepthTexFormats[5] = D3DFMT_D16_LOCKABLE; - DepthTexFormats[6] = D3DFMT_D24X8; - - for (int i = 0;i<4;i++) - { - s_efb_depth_surface_Format = DepthTexFormats[i]; - hr = D3D::dev->CreateTexture(target_width, target_height, 1, D3DUSAGE_DEPTHSTENCIL, s_efb_depth_surface_Format, - D3DPOOL_DEFAULT, &s_efb_depth_texture, NULL); - if (!FAILED(hr)) - break; - - } - CHECK(hr,"Create Depth Texture"); - if (!FAILED(hr)) - { - //we found a dept texture suported by hardware so get the surface to draw to - hr = s_efb_depth_texture->GetSurfaceLevel(0, &s_efb_depth_surface); - CHECK(hr,"Get Depth Surface"); - //create a buffer texture for peeking - hr = D3D::dev->CreateTexture(1, 1, 1, D3DUSAGE_DEPTHSTENCIL, s_efb_depth_surface_Format, - D3DPOOL_DEFAULT, &s_efb_depthBuffer_texture, NULL); - CHECK(hr,"Create Depth Pixel Texture"); - if (!FAILED(hr)) - { - //texture create correctly so get the surface - hr = s_efb_depthBuffer_texture->GetSurfaceLevel(0, &s_efb_depth_ReadBuffer); - CHECK(hr,"Get Depth Pixel Surface"); - // create an ofscren surface to grab the data - hr = D3D::dev->CreateOffscreenPlainSurface(1, 1, s_efb_depth_surface_Format, D3DPOOL_SYSTEMMEM, &s_efb_depth_OffScreenReadBuffer, NULL ); - CHECK(hr,"Create Depth offscreen Surface"); - if (FAILED(hr)) - { - //no depth in system mem so try vista path to grab depth data - //create a offscreen lockeable surface - hr = D3D::dev->CreateOffscreenPlainSurface(1, 1, D3DFMT_D32F_LOCKABLE, D3DPOOL_DEFAULT, &s_efb_depth_OffScreenReadBuffer, NULL ); - CHECK(hr, "Create Depth D3DFMT_D32F_LOCKABLE offscreen Surface"); - if (s_efb_depth_ReadBuffer) - s_efb_depth_ReadBuffer->Release(); - //this is ugly but is a fast way to test wich path to proceed for peeking - s_efb_depth_ReadBuffer = s_efb_depth_OffScreenReadBuffer; - s_efb_depth_surface_Format = D3DFMT_D32F_LOCKABLE; - } - } - - } - if (!FAILED(hr)) - { - //so far so god, texture depth works so return - delete [] DepthTexFormats; - return; - } - else - { - //no depth texture... cleanup - if(s_efb_depth_ReadBuffer) - s_efb_depth_ReadBuffer->Release(); - s_efb_depth_ReadBuffer = NULL; - - if(s_efb_depth_OffScreenReadBuffer) - s_efb_depth_OffScreenReadBuffer->Release(); - - if(s_efb_depth_surface) - s_efb_depth_surface->Release(); - s_efb_depth_surface = NULL; - - if(s_efb_depthBuffer_texture) - s_efb_depthBuffer_texture->Release(); - s_efb_depthBuffer_texture=NULL; - - if(s_efb_depth_texture) - s_efb_depth_texture->Release(); - s_efb_depth_texture = NULL; - } - // no depth textures... try to create an lockable depth surface - for(int i = 4;i<7;i++) + D3DFORMAT *DepthTexFormats = new D3DFORMAT[3]; + DepthTexFormats[0] = D3DFMT_D32F_LOCKABLE; + DepthTexFormats[1] = D3DFMT_D16_LOCKABLE; + DepthTexFormats[2] = D3DFMT_D24X8; + for(int i = 0;i<3;i++) { s_efb_depth_surface_Format = DepthTexFormats[i]; hr = D3D::dev->CreateDepthStencilSurface(target_width, target_height, s_efb_depth_surface_Format, @@ -190,6 +114,7 @@ void Create() if (!FAILED(hr)) break; } s_efb_depth_ReadBuffer = s_efb_depth_surface; + s_efb_depth_OffScreenReadBuffer = s_efb_depth_surface; CHECK(hr,"CreateDepthStencilSurface"); delete [] DepthTexFormats; } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index b68522f936..c886677809 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -534,26 +534,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) RectToLock.left = 0; RectToLock.right = 1; RectToLock.top = 0; - } - else - { - //like i say in FramebufferManager this is ugly... using the pointers to decide the peek path.. but it works:) - if(BufferFormat == D3DFMT_D32F_LOCKABLE && RBuffer == pOffScreenBuffer) - { - //we are using vista path so use updateSurface to copy depth data - hr = D3D::dev->UpdateSurface(pBuffer,&RectToLock,pOffScreenBuffer,NULL); - if(FAILED(hr)) - { - PanicAlert("Unable to update data to mem buffer"); - return 0; - } - } - else - { - //we are using lockable depth buffer so change the pointer to lock it directly - pOffScreenBuffer = pBuffer; - } - } + } //the surface is good.. lock it if((hr = pOffScreenBuffer->LockRect(&drect, &RectToLock, D3DLOCK_READONLY)) != D3D_OK) { @@ -571,7 +552,6 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) val = ((float *)drect.pBits)[0]; z = ((u32)(val * 0xffffff));// 0xFFFFFFFF; break; - case (D3DFORMAT)MAKEFOURCC('D','F','1','6'): case D3DFMT_D16_LOCKABLE: val = ((float)((u16 *)drect.pBits)[0])/((float)0xFFFF); z = ((u32)(val * 0xffffff)); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index bed5cc0fc4..4ac71d86f6 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -286,8 +286,6 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr glGenProgramsARB(1, &ps.glprogid); EnableShader(ps.glprogid); - //glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid); - //CurrentShader = ps.glprogid; glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); err = GL_REPORT_ERROR(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 0e854544a5..e2036aeaec 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -144,8 +144,14 @@ static const GLenum glSrcFactors[8] = }; static const GLenum glDestFactors[8] = { - GL_ZERO, GL_ONE, GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, - GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA }; void SetDefaultRectTexParams() @@ -538,14 +544,9 @@ void Renderer::RestoreAPIState() void Renderer::SetColorMask() { - if (bpmem.blendmode.alphaupdate && bpmem.blendmode.colorupdate) - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - else if (bpmem.blendmode.alphaupdate) - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); - else if (bpmem.blendmode.colorupdate) - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_FALSE); - else - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + GLenum ColorMask = (bpmem.blendmode.colorupdate) ? GL_TRUE : GL_FALSE; + GLenum AlphaMask = (bpmem.blendmode.alphaupdate) ? GL_TRUE : GL_FALSE; + glColorMask(ColorMask, ColorMask, ColorMask, AlphaMask); } void Renderer::SetBlendMode(bool forceUpdate) @@ -1001,14 +1002,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) } // --------------------------------------------------------------------- GL_REPORT_ERRORD(); - - /*for (int i = 0; i < 8; i++) { - glActiveTexture(GL_TEXTURE0 + i); - glDisable(GL_TEXTURE_2D); - glDisable(GL_TEXTURE_RECTANGLE_ARB); - } - glActiveTexture(GL_TEXTURE0);*/ - + DrawDebugText(); GL_REPORT_ERRORD(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index 9d07d36b28..6b5ac73de0 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -280,7 +280,7 @@ void VertexShaderCache::DisableShader() void VertexShaderCache::SetCurrentShader(GLuint Shader) { - if(ShaderEnabled && CurrentShader != Shader) + if(ShaderEnabled /*&& CurrentShader != Shader*/) { CurrentShader = Shader; glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);