diff --git a/Source/Core/VideoCommon/Src/Debugger.cpp b/Source/Core/VideoCommon/Src/Debugger.cpp index 005f81c8e6..e0024db655 100644 --- a/Source/Core/VideoCommon/Src/Debugger.cpp +++ b/Source/Core/VideoCommon/Src/Debugger.cpp @@ -103,21 +103,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path) if (!useDstAlpha) { output = "Destination alpha disabled:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } else { if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend) { output = "Using dual source blending for destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } else { output = "Using two passes for emulating destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); output += "\n\nDestination alpha pass shader:\n"; - output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 448501aad0..5ee95848b5 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -28,241 +28,6 @@ #include "NativeVertexFormat.h" -static void StageHash(u32 stage, u32* out) -{ - out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24 - u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now - out[0] |= (alphaC&0xF0) << 24; // 8 - out[1] |= alphaC >> 8; // 16 - - // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3 - out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1 - // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan - - bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages; - out[2] |= bHasIndStage << 2; // 1 - - bool needstexcoord = false; - - if (bHasIndStage) - { - out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation - needstexcoord = true; - } - - - TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC; - TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC; - - if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC - || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC - || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC - || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC - || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA - || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) - { - out[0] |= bpmem.combiners[stage].alphaC.rswap; - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2 - out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23; - out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1; - } - - out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1); - if (bpmem.tevorders[stage/2].getEnable(stage&1)) - { - if (bHasIndStage) needstexcoord = true; - - out[0] |= bpmem.combiners[stage].alphaC.tswap; - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2 - out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16; - } - - if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST - || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) - { - out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5 - out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5 - } - - if (needstexcoord) - { - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16; - } -} - -// Mash together all the inputs that contribute to the code of a generated pixel shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -// It would likely be a lot more efficient to build this incrementally as the attributes -// are set... -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - memset(uid->values, 0, sizeof(uid->values)); - uid->values[0] |= bpmem.genMode.numtevstages; // 4 - uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 - uid->values[0] |= dstAlphaMode << 8; // 2 - - bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - uid->values[0] |= enablePL << 10; // 1 - - if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 11; // 4 - - AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult(); - uid->values[0] |= alphaPreTest << 15; // 2 - - // numtexgens should be <= 8 - for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) - uid->values[0] |= xfregs.texMtxInfo[i].projection << (17+i); // 1 - - uid->values[1] = bpmem.genMode.numindstages; // 3 - u32 indirectStagesUsed = 0; - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - indirectStagesUsed |= (1 << bpmem.tevind[i].bt); - - assert(indirectStagesUsed == (indirectStagesUsed & 0xF)); - - uid->values[1] |= indirectStagesUsed << 3; // 4; - - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - { - if (indirectStagesUsed & (1 << i)) - { - uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (7 + 3*i); // 1 - if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) - uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (8 + 3*i); // 2 - } - } - - u32* ptr = &uid->values[2]; - for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i) - { - StageHash(i, ptr); - ptr += 4; // max: ptr = &uid->values[66] - } - - ptr[0] |= bpmem.alpha_test.comp0; // 3 - ptr[0] |= bpmem.alpha_test.comp1 << 3; // 3 - ptr[0] |= bpmem.alpha_test.logic << 6; // 2 - - ptr[0] |= bpmem.ztex2.op << 8; // 2 - ptr[0] |= bpmem.zcontrol.early_ztest << 10; // 1 - ptr[0] |= bpmem.zmode.testenable << 11; // 1 - ptr[0] |= bpmem.zmode.updateenable << 12; // 1 - - if (dstAlphaMode != DSTALPHA_ALPHA_PASS) - { - ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 13; // 3 - if (bpmem.fog.c_proj_fsel.fsel != 0) - { - ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1 - ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1 - } - } - - ++ptr; - if (enablePL) - { - ptr += GetLightingShaderId(ptr); - *ptr++ = components; - } - - uid->num_values = ptr - uid->values; -} - -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - memset(uid->values, 0, sizeof(uid->values)); - u32* ptr = uid->values; - *ptr++ = dstAlphaMode; // 0 - *ptr++ = bpmem.genMode.hex; // 1 - *ptr++ = bpmem.ztex2.hex; // 2 - *ptr++ = bpmem.zcontrol.hex; // 3 - *ptr++ = bpmem.zmode.hex; // 4 - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 5 - *ptr++ = xfregs.numTexGen.hex; // 6 - - if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - *ptr++ = xfregs.color[0].hex; - *ptr++ = xfregs.alpha[0].hex; - *ptr++ = xfregs.color[1].hex; - *ptr++ = xfregs.alpha[1].hex; - *ptr++ = components; - } - - for (unsigned int i = 0; i < 8; ++i) - *ptr++ = xfregs.texMtxInfo[i].hex; // 7-14 - - for (unsigned int i = 0; i < 16; ++i) - *ptr++ = bpmem.tevind[i].hex; // 15-30 - - *ptr++ = bpmem.tevindref.hex; // 31 - - for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times - { - *ptr++ = bpmem.combiners[i].colorC.hex; // 32+5*i - *ptr++ = bpmem.combiners[i].alphaC.hex; // 33+5*i - *ptr++ = bpmem.tevind[i].hex; // 34+5*i - *ptr++ = bpmem.tevksel[i/2].hex; // 35+5*i - *ptr++ = bpmem.tevorders[i/2].hex; // 36+5*i - } - - ptr = &uid->values[112]; - - *ptr++ = bpmem.alpha_test.hex; // 112 - - *ptr++ = bpmem.fog.c_proj_fsel.hex; // 113 - *ptr++ = bpmem.fogRange.Base.hex; // 114 - - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - PIXELSHADERUIDSAFE new_id; - GetSafePixelShaderId(&new_id, dstAlphaMode, components); - - if (!(old_id == new_id)) - { - std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components)); - if (old_code != new_code) - { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file(szTemp); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); - } - } -} - // old tev->pixelshader notes // // color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 @@ -272,11 +37,11 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std:: // output is given by .outreg // tevtemp is set according to swapmodetables and -static void WriteStage(char *&p, int n, API_TYPE ApiType); -static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +template static void WriteStage(char *&p, int n, API_TYPE ApiType); +template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); // static void WriteAlphaCompare(char *&p, int num, int comp); -static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); -static void WriteFog(char *&p); +template static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); +template static void WriteFog(char *&p); static const char *tevKSelTableC[] = // KCSEL { @@ -456,8 +221,6 @@ static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" }; static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" }; -#define WRITE p+=sprintf - static char swapModeTable[4][5]; static char text[16384]; @@ -484,105 +247,109 @@ static void BuildSwapModeTable() } } -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +template +void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - text[sizeof(text) - 1] = 0x7C; // canary +#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; + if (type == GO_ShaderCode) + { + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + out.SetBuffer(text); + } +/// text[sizeof(text) - 1] = 0x7C; // canary + /// TODO: Uids! BuildSwapModeTable(); // Needed for WriteStage - int numStages = bpmem.genMode.numtevstages + 1; - int numTexgen = bpmem.genMode.numtexgens; + unsigned int numStages = bpmem.genMode.numtevstages + 1; + unsigned int numTexgen = bpmem.genMode.numtexgens; char *p = text; - WRITE(p, "//Pixel Shader for TEV stages\n"); - WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n", + out.Write("//Pixel Shader for TEV stages\n"); + out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", numStages, numTexgen/*, bpmem.genMode.numindstages*/); - int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) - { - for (int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - // Declare samplers + SetUidField(components, components); + SetUidField(dstAlphaMode, dstAlphaMode); + SetUidField(genMode.numindstages, bpmem.genMode.numindstages); + SetUidField(genMode.numtevstages, bpmem.genMode.numtevstages); + SetUidField(genMode.numtexgens, bpmem.genMode.numtexgens); + + // Declare samplers if(ApiType != API_D3D11) { - WRITE(p, "uniform sampler2D "); + out.Write("uniform sampler2D "); } else { - WRITE(p, "sampler "); + out.Write("sampler "); } bool bfirst = true; for (int i = 0; i < 8; ++i) { - WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",", i, i); + out.Write("%s samp%d : register(s%d)", bfirst?"":",", i, i); bfirst = false; } - WRITE(p, ";\n"); + out.Write(";\n"); if(ApiType == API_D3D11) { - WRITE(p, "Texture2D "); + out.Write("Texture2D "); bfirst = true; for (int i = 0; i < 8; ++i) { - WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i); + out.Write("%s Tex%d : register(t%d)", bfirst?"":",", i, i); bfirst = false; } - WRITE(p, ";\n"); + out.Write(";\n"); } - WRITE(p, "\n"); + out.Write("\n"); - WRITE(p, "uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); - WRITE(p, "uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); - WRITE(p, "uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); - WRITE(p, "uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); - WRITE(p, "uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); - WRITE(p, "uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); - WRITE(p, "uniform float4 " I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); - WRITE(p, "uniform float4 " I_FOG"[3] : register(c%d);\n", C_FOG); + out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); + out.Write("uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); + out.Write("uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); + out.Write("uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); + out.Write("uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); + out.Write("uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); + out.Write("uniform float4 " I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); + out.Write("uniform float4 " I_FOG"[3] : register(c%d);\n", C_FOG); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); - WRITE(p,"typedef struct { Light lights[8]; } s_" I_PLIGHTS";\n"); - WRITE(p, "uniform s_" I_PLIGHTS" " I_PLIGHTS" : register(c%d);\n", C_PLIGHTS); - WRITE(p, "typedef struct { float4 C0, C1, C2, C3; } s_" I_PMATERIALS";\n"); - WRITE(p, "uniform s_" I_PMATERIALS" " I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); + out.Write("typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); + out.Write("typedef struct { Light lights[8]; } s_" I_PLIGHTS";\n"); + out.Write("uniform s_" I_PLIGHTS" " I_PLIGHTS" : register(c%d);\n", C_PLIGHTS); + out.Write("typedef struct { float4 C0, C1, C2, C3; } s_" I_PMATERIALS";\n"); + out.Write("uniform s_" I_PMATERIALS" " I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); } - WRITE(p, "void main(\n"); + out.Write("void main(\n"); if(ApiType != API_D3D11) { - WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", + out.Write(" out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "", "\n out float depth : DEPTH,", ApiType & API_OPENGL ? "WPOS" : ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS"); } else { - WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", + out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", "\n out float depth : SV_Depth,"); } - WRITE(p, " in float4 colors_0 : COLOR0,\n"); - WRITE(p, " in float4 colors_1 : COLOR1"); + out.Write(" in float4 colors_0 : COLOR0,\n"); + out.Write(" in float4 colors_1 : COLOR1"); // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, ",\n in float3 uv%d : TEXCOORD%d", i, i); - WRITE(p, ",\n in float4 clipPos : TEXCOORD%d", numTexgen); + for (unsigned int i = 0; i < numTexgen; ++i) + out.Write(",\n in float3 uv%d : TEXCOORD%d", i, i); + out.Write(",\n in float4 clipPos : TEXCOORD%d", numTexgen); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, ",\n in float4 Normal : TEXCOORD%d", numTexgen + 1); + out.Write(",\n in float4 Normal : TEXCOORD%d", numTexgen + 1); } else { @@ -590,17 +357,18 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { for (int i = 0; i < 8; ++i) - WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i); + out.Write(",\n in float4 uv%d : TEXCOORD%d", i, i); } else { + /// TODO: Set numTexGen used for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); + out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); } } - WRITE(p, " ) {\n"); + out.Write(" ) {\n"); - WRITE(p, " float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" + out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n" " float4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n" " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n" @@ -613,63 +381,96 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType { if (xfregs.numTexGen.numTexGens < 7) { - WRITE(p,"float3 _norm0 = normalize(Normal.xyz);\n\n"); - WRITE(p,"float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); + out.Write("float3 _norm0 = normalize(Normal.xyz);\n\n"); + out.Write("float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); } else { - WRITE(p," float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); - WRITE(p,"float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); + out.Write(" float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); + out.Write("float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); } - WRITE(p, "float4 mat, lacc;\n" + out.Write("float4 mat, lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); - p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); +/// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } if (numTexgen < 7) - WRITE(p, "clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); + out.Write("clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); else - WRITE(p, "float4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); + out.Write("float4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { - WRITE(p, "float3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); + out.Write("float3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); } else { - for (int i = 0; i < numTexgen; ++i) + for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides + SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { - WRITE(p, "if (uv%d.z)", i); - WRITE(p, " uv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); + out.Write("if (uv%d.z)", i); + out.Write(" uv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } - WRITE(p, "uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); + out.Write("uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); } } // indirect texture map lookup + int nIndirectStagesUsed = 0; + if (bpmem.genMode.numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + /// Ignoring this for now, handled in WriteStage. + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + SetUidField(nIndirectStagesUsed, nIndirectStagesUsed); for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { if (nIndirectStagesUsed & (1<(out, buffer, "tempcoord", "abg", texmap, ApiType); } } @@ -683,8 +484,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType RegisterStates[i].AuxStored = false; } - for (int i = 0; i < numStages; i++) - WriteStage(p, i, ApiType); //build the equation for this stage + for (unsigned int i = 0; i < numStages; i++) + WriteStage(out, i, ApiType); //build the equation for this stage if(numStages) { @@ -692,48 +493,49 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType // regardless of the used destination register if(bpmem.combiners[numStages - 1].colorC.dest != 0) { +/// SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; - WRITE(p, "prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); + out.Write("prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; } if(bpmem.combiners[numStages - 1].alphaC.dest != 0) { bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored; - WRITE(p, "prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); + out.Write("prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl; } } // emulation of unsigned 8 overflow when casting if needed if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) - WRITE(p, "prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); if (Pretest == AlphaTest::UNDETERMINED) - WriteAlphaTest(p, ApiType, dstAlphaMode); + WriteAlphaTest(out, ApiType, dstAlphaMode); // the screen space depth value = far z + (clip z / clip w) * z range - WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); + out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); // Note: depth textures are disabled if early depth test is enabled if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... - WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", + out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); // scale to make result from frac correct - WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n"); - WRITE(p, "zCoord = frac(zCoord);\n"); - WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n"); + out.Write("zCoord = zCoord * (16777215.0f/16777216.0f);\n"); + out.Write("zCoord = frac(zCoord);\n"); + out.Write("zCoord = zCoord * (16777216.0f/16777215.0f);\n"); } - WRITE(p, "depth = zCoord;\n"); + out.Write("depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) - WRITE(p, " ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); + out.Write(" ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); else { - WriteFog(p); - WRITE(p, " ocol0 = prev;\n"); + WriteFog(out); + out.Write(" ocol0 = prev;\n"); } // On D3D11, use dual-source color blending to perform dst alpha in a @@ -741,17 +543,16 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) { // Colors will be blended against the alpha from ocol1... - WRITE(p, " ocol1 = ocol0;\n"); + out.Write(" ocol1 = ocol0;\n"); // ...and the alpha from ocol0 will be written to the framebuffer. - WRITE(p, " ocol0.a = " I_ALPHA"[0].a;\n"); + out.Write(" ocol0.a = " I_ALPHA"[0].a;\n"); } - WRITE(p, "}\n"); - if (text[sizeof(text) - 1] != 0x7C) - PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); + out.Write("}\n"); +/// if (text[sizeof(text) - 1] != 0x7C) +/// PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); setlocale(LC_NUMERIC, ""); // restore locale - return text; } @@ -800,7 +601,8 @@ static const char *TEVCMPAlphaOPTable[16] = }; -static void WriteStage(char *&p, int n, API_TYPE ApiType) +template +static void WriteStage(T& out, int n, API_TYPE ApiType) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -810,25 +612,25 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) if (!bHasTexCoord) texcoord = 0; - WRITE(p, "// TEV stage %d\n", n); + out.Write("// TEV stage %d\n", n); if (bHasIndStage) { - WRITE(p, "// indirect op\n"); + out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { - WRITE(p, "alphabump = indtex%d.%s %s;\n", + out.Write("alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); } // format - WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); + out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias if (bpmem.tevind[n].bias != ITB_NONE ) - WRITE(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale if (bpmem.tevind[n].mid != 0) @@ -836,26 +638,26 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1); - WRITE(p, "float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", + out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", n, mtxidx, n, mtxidx+1, n); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); - WRITE(p, "float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); - WRITE(p, "float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); } else - WRITE(p, "float2 indtevtrans%d = 0;\n", n); + out.Write("float2 indtevtrans%d = 0;\n", n); } else - WRITE(p, "float2 indtevtrans%d = 0;\n", n); + out.Write("float2 indtevtrans%d = 0;\n", n); // --------- // Wrapping @@ -863,24 +665,24 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - WRITE(p, "wrappedcoord.x = uv%d.x;\n", texcoord); + out.Write("wrappedcoord.x = uv%d.x;\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) - WRITE(p, "wrappedcoord.x = 0.0f;\n"); + out.Write("wrappedcoord.x = 0.0f;\n"); else - WRITE(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - WRITE(p, "wrappedcoord.y = uv%d.y;\n", texcoord); + out.Write("wrappedcoord.y = uv%d.y;\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) - WRITE(p, "wrappedcoord.y = 0.0f;\n"); + out.Write("wrappedcoord.y = 0.0f;\n"); else - WRITE(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord - WRITE(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); + out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); else - WRITE(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); + out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); } TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; @@ -894,8 +696,8 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); - WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + out.Write("crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); } @@ -905,17 +707,17 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { // calc tevcord if(bHasTexCoord) - WRITE(p, "tevcoord.xy = uv%d.xy;\n", texcoord); + out.Write("tevcoord.xy = uv%d.xy;\n", texcoord); else - WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n"); + out.Write("tevcoord.xy = float2(0.0f, 0.0f);\n"); } char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); - SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType); + SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); } else - WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + out.Write("textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST @@ -923,14 +725,14 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); - WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if(kc > 7 || ka > 7) { - WRITE(p, "ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); } else { - WRITE(p, "ckonsttemp = konsttemp;\n"); + out.Write("ckonsttemp = konsttemp;\n"); } } @@ -941,13 +743,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) { - WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[0].AlphaNeedOverflowControl = false; RegisterStates[0].ColorNeedOverflowControl = false; } else { - WRITE(p, "cprev = prev;\n"); + out.Write("cprev = prev;\n"); } RegisterStates[0].AuxStored = true; } @@ -959,13 +761,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) { - WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[1].AlphaNeedOverflowControl = false; RegisterStates[1].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc0 = c0;\n"); + out.Write("cc0 = c0;\n"); } RegisterStates[1].AuxStored = true; } @@ -977,13 +779,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) { - WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[2].AlphaNeedOverflowControl = false; RegisterStates[2].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc1 = c1;\n"); + out.Write("cc1 = c1;\n"); } RegisterStates[2].AuxStored = true; } @@ -995,13 +797,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) { - WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[3].AlphaNeedOverflowControl = false; RegisterStates[3].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc2 = c2;\n"); + out.Write("cc2 = c2;\n"); } RegisterStates[3].AuxStored = true; } @@ -1009,117 +811,116 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); RegisterStates[cc.dest].AuxStored = false; - // combine the color channel - WRITE(p, "// color combine\n"); + out.Write("// color combine\n"); if (cc.clamp) - WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]); + out.Write("%s = saturate(", tevCOutputTable[cc.dest]); else - WRITE(p, "%s = ", tevCOutputTable[cc.dest]); + out.Write("%s = ", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare { //normal color combiner goes here if (cc.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[cc.shift]); + out.Write("%s*(", tevScaleTable[cc.shift]); if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - WRITE(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); + out.Write("%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); if (cc.a == cc.b) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); + out.Write("%s", tevCInputTable[cc.a + 16]); else if (cc.c == TEVCOLORARG_ZERO) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); + out.Write("%s", tevCInputTable[cc.a + 16]); else if (cc.c == TEVCOLORARG_ONE) - WRITE(p, "%s", tevCInputTable[cc.b + 16]); + out.Write("%s", tevCInputTable[cc.b + 16]); else if (cc.a == TEVCOLORARG_ZERO) - WRITE(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); else if (cc.b == TEVCOLORARG_ZERO) - WRITE(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); + out.Write("%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); else - WRITE(p, "lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); - WRITE(p, "%s", tevBiasTable[cc.bias]); + out.Write("%s", tevBiasTable[cc.bias]); if (cc.shift > TEVSCALE_1) - WRITE(p, ")"); + out.Write(")"); } else { int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here - WRITE(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table + out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table tevCInputTable[cc.d], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); } if (cc.clamp) - WRITE(p, ")"); - WRITE(p,";\n"); + out.Write(")"); + out.Write(";\n"); RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); RegisterStates[ac.dest].AuxStored = false; - // combine the alpha channel - WRITE(p, "// alpha combine\n"); + out.Write("// alpha combine\n"); if (ac.clamp) - WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]); + out.Write("%s = saturate(", tevAOutputTable[ac.dest]); else - WRITE(p, "%s = ", tevAOutputTable[ac.dest]); + out.Write("%s = ", tevAOutputTable[ac.dest]); if (ac.bias != TevBias_COMPARE) // if not compare { //normal alpha combiner goes here if (ac.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[ac.shift]); + out.Write("%s*(", tevScaleTable[ac.shift]); if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - WRITE(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); + out.Write("%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); if (ac.a == ac.b) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + out.Write("%s.a", tevAInputTable[ac.a + 8]); else if (ac.c == TEVALPHAARG_ZERO) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + out.Write("%s.a", tevAInputTable[ac.a + 8]); else if (ac.a == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); else if (ac.b == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); + out.Write("%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); else - WRITE(p, "lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); - WRITE(p, "%s",tevBiasTable[ac.bias]); + out.Write("%s",tevBiasTable[ac.bias]); if (ac.shift>0) - WRITE(p, ")"); + out.Write(")"); } else { //compare alpha combiner goes here int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - WRITE(p, TEVCMPAlphaOPTable[cmp], + out.Write(TEVCMPAlphaOPTable[cmp], tevAInputTable[ac.d], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); } if (ac.clamp) - WRITE(p, ")"); - WRITE(p, ";\n\n"); - WRITE(p, "// TEV done\n"); + out.Write(")"); + out.Write(";\n\n"); + out.Write("// TEV done\n"); } -void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) +template +void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { if (ApiType == API_D3D11) - WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); + out.Write("%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); else - WRITE(p, "%s=tex2D(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); + out.Write("%s=tex2D(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); } static const char *tevAlphaFuncsTable[] = { - "(false)", //ALPHACMP_NEVER 0 + "(false)", //ALPHACMP_NEVER 0, TODO: Not safe? "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 @@ -1137,7 +938,8 @@ static const char *tevAlphaFunclogicTable[] = " == " // xnor }; -static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode) +template +static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode) { static const char *alphaRef[2] = { @@ -1146,21 +948,21 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode }; // using discard then return works the same in cg and dx9 but not in dx11 - WRITE(p, "if(!( "); + out.Write("if(!( "); int compindex = bpmem.alpha_test.comp0; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table + out.Write(tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table - WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op + out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op compindex = bpmem.alpha_test.comp1; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table - WRITE(p, ")) {\n"); + out.Write(tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table + out.Write(")) {\n"); - WRITE(p, "ocol0 = 0;\n"); + out.Write("ocol0 = 0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) - WRITE(p, "ocol1 = 0;\n"); - WRITE(p, "depth = 1.f;\n"); + out.Write("ocol1 = 0;\n"); + out.Write("depth = 1.f;\n"); // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before // or after texturing and alpha test. PC GPUs have no way to support this @@ -1174,12 +976,12 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode // we don't have a choice. if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) { - WRITE(p, "discard;\n"); + out.Write("discard;\n"); if (ApiType != API_D3D11) - WRITE(p, "return;\n"); + out.Write("return;\n"); } - WRITE(p, "}\n"); + out.Write("}\n"); } static const char *tevFogFuncsTable[] = @@ -1194,38 +996,40 @@ static const char *tevFogFuncsTable[] = " fog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" //backward exp2 }; -static void WriteFog(char *&p) +template +static void WriteFog(T& out) { - if(bpmem.fog.c_proj_fsel.fsel == 0)return;//no Fog + if(bpmem.fog.c_proj_fsel.fsel == 0) + return; //no Fog if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective // ze = A/(B - (Zs >> B_SHF) - WRITE (p, " float ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); + out.Write(" float ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - WRITE (p, " float ze = " I_FOG"[1].x * zCoord;\n"); + out.Write(" float ze = " I_FOG"[1].x * zCoord;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust - //this is complitly teorical as the real hard seems to use a table intead of calculate the values. + // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. if(bpmem.fogRange.Base.Enabled) { - WRITE (p, " float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); - WRITE (p, " x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); - WRITE (p, " ze *= x_adjust;\n"); + out.Write(" float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); + out.Write(" x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); + out.Write(" ze *= x_adjust;\n"); } - WRITE (p, " float fog = saturate(ze - " I_FOG"[1].z);\n"); + out.Write("float fog = saturate(ze - " I_FOG"[1].z);\n"); if(bpmem.fog.c_proj_fsel.fsel > 3) { - WRITE(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); + out.Write("%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); } else { @@ -1233,7 +1037,15 @@ static void WriteFog(char *&p) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - WRITE(p, " prev.rgb = lerp(prev.rgb," I_FOG"[0].rgb,fog);\n"); - - + out.Write(" prev.rgb = lerp(prev.rgb," I_FOG"[0].rgb,fog);\n"); +} + +void GetPixelShaderId(PixelShaderUid& object, DSTALPHA_MODE dst_alpha_mode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dst_alpha_mode, ApiType, components); +} + +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dst_alpha_mode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dst_alpha_mode, ApiType, components); } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 9c8bfce256..61be98aa6e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -19,6 +19,7 @@ #define GCOGL_PIXELSHADER_H #include "VideoCommon.h" +#include "ShaderGenCommon.h" #define I_COLORS "color" #define I_KCOLORS "k" @@ -44,66 +45,6 @@ #define C_PLIGHTS (C_FOG + 3) #define C_PMATERIALS (C_PLIGHTS + 40) #define C_PENVCONST_END (C_PMATERIALS + 4) -#define PIXELSHADERUID_MAX_VALUES 70 -#define PIXELSHADERUID_MAX_VALUES_SAFE 115 - -// DO NOT make anything in this class virtual. -template -class _PIXELSHADERUID -{ -public: - u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES]; - int num_values; - - _PIXELSHADERUID() - { - } - - _PIXELSHADERUID(const _PIXELSHADERUID& r) - { - num_values = r.num_values; - if (safe) memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE); - else memcpy(values, r.values, r.GetNumValues() * sizeof(values[0])); - } - - int GetNumValues() const - { - if (safe) return (sizeof(values) / sizeof(u32)); - else return num_values; - } - - bool operator <(const _PIXELSHADERUID& _Right) const - { - int N = GetNumValues(); - if (N < _Right.GetNumValues()) - return true; - else if (N > _Right.GetNumValues()) - return false; - for (int i = 0; i < N; ++i) - { - if (values[i] < _Right.values[i]) - return true; - else if (values[i] > _Right.values[i]) - return false; - } - return false; - } - - bool operator ==(const _PIXELSHADERUID& _Right) const - { - int N = GetNumValues(); - if (N != _Right.GetNumValues()) - return false; - for (int i = 0; i < N; ++i) - { - if (values[i] != _Right.values[i]) - return false; - } - return true; - } -}; -typedef _PIXELSHADERUID PIXELSHADERUID; -typedef _PIXELSHADERUID PIXELSHADERUIDSAFE; // Different ways to achieve rendering with destination alpha enum DSTALPHA_MODE @@ -113,12 +54,53 @@ enum DSTALPHA_MODE DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending }; -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +enum ALPHA_PRETEST_RESULT +{ + ALPHAPT_UNDEFINED, // AlphaTest Result is not defined + ALPHAPT_ALWAYSFAIL, // Alpha test alway Fail + ALPHAPT_ALWAYSPASS // Alpha test alway Pass +}; -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components); -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components); +struct pixel_shader_uid_data +{ + u32 components; + DSTALPHA_MODE dstAlphaMode; // TODO: as u32 :2 + ALPHA_PRETEST_RESULT Pretest; // TODO: As :2 + u32 nIndirectStagesUsed : 8; + struct { + u32 numtexgens : 4; + u32 numtevstages : 4; + u32 numindstages : 3; + } genMode; + u32 fogc_proj_fselfsel : 3; + struct + { + u32 unknown : 1; + u32 projection : 1; // XF_TEXPROJ_X + u32 inputform : 2; // XF_TEXINPUT_X + u32 texgentype : 3; // XF_TEXGEN_X + u32 sourcerow : 5; // XF_SRCGEOM_X + u32 embosssourceshift : 3; // what generated texcoord to use + u32 embosslightshift : 3; // light index that is used + } texMtxInfo[8]; + struct + { + u32 bi0 : 3; // indirect tex stage 0 ntexmap + u32 bc0 : 3; // indirect tex stage 0 ntexcoord + u32 bi1 : 3; + u32 bc1 : 3; + u32 bi2 : 3; + u32 bc3 : 3; + u32 bi4 : 3; + u32 bc4 : 3; + } tevindref; +}; -// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components); +typedef ShaderUid PixelShaderUid; +typedef ShaderCode PixelShaderCode; + + +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +void GetPixelShaderId(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); #endif // GCOGL_PIXELSHADER_H diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 28fb8e8956..090fb24b57 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -44,7 +44,7 @@ public: // TODO: Store last frame used and order by that? makes much more sense anyway... bool operator < (const ShaderUid& obj) const { - for (int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) + for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) { if (this->values[i] < obj.values[i]) return true; @@ -91,4 +91,10 @@ private: char* write_ptr; }; +enum GenOutput +{ + GO_ShaderCode, + GO_ShaderUid, +}; + #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index acc89c6a54..db4946a49f 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -28,11 +28,6 @@ static char text[16768]; -enum GenOutput -{ - GO_ShaderCode, - GO_ShaderUid, -}; // TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? template void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ deleted file mode 100644 index be57103918..0000000000 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ +++ /dev/null @@ -1,645 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include -#include - -#include "NativeVertexFormat.h" - -#include "BPMemory.h" -#include "CPMemory.h" -#include "LightingShaderGen.h" -#include "VertexShaderGen.h" -#include "VideoConfig.h" - -static char text[16768]; - -enum GenOutput -{ - GO_ShaderCode, - GO_ShaderUid, -}; -// TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? -template -void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) -{ - object.Write("struct VS_OUTPUT {\n"); - object.Write(" float4 pos : POSITION;\n"); - object.Write(" float4 colors_0 : COLOR0;\n"); - object.Write(" float4 colors_1 : COLOR1;\n"); - - if (xfregs.numTexGen.numTexGens < 7) - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - object.Write(" float3 tex%d : TEXCOORD%d;\n", i, i); - - object.Write(" float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); -/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) -/// object.Write(" float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); - } - else - { - // clip position is in w of first 4 texcoords -/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) -/// { -/// for (int i = 0; i < 8; ++i) -/// object.Write(" float4 tex%d : TEXCOORD%d;\n", i, i); -/// } -/// else - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - object.Write(" float%d tex%d : TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, i); - } - } - object.Write("};\n"); -} - -template -void _GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) -{ -#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; - const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; - const char* swizzle = "xyzw"; - if (coloralpha == 1 ) swizzle = "xyz"; - else if (coloralpha == 2 ) swizzle = "w"; - - SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); - SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); - if (!(chan.attnfunc & 1)) { - // atten disabled - switch (chan.diffusefunc) { - case LIGHTDIF_NONE: - object.Write("lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - object.Write("ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); - object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); - break; - default: _assert_(0); - } - } - else { // spec and spot - - if (chan.attnfunc == 3) - { // spot - object.Write("ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); - object.Write("dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); - } - else if (chan.attnfunc == 1) - { // specular - object.Write("ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); - object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); - } - - switch (chan.diffusefunc) - { - case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, - chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - lightsName, - index, - swizzle); - break; - default: _assert_(0); - } - } - object.Write("\n"); -} - -// vertex shader -// lights/colors -// materials name is I_MATERIALS in vs and I_PMATERIALS in ps -// inColorName is color in vs and colors_ in ps -// dest is o.colors_ in vs and colors_ in ps -template -void _GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) -{ - for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) - { - const LitChannel& color = xfregs.color[j]; - const LitChannel& alpha = xfregs.alpha[j]; - - object.Write("{\n"); - - SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); - if (color.matsource) {// from vertex - if (components & (VB_HAS_COL0 << j)) - object.Write("mat = %s%d;\n", inColorName, j); - else if (components & VB_HAS_COL0) - object.Write("mat = %s0;\n", inColorName); - else - object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - else // from color - object.Write("mat = %s.C%d;\n", materialsName, j+2); - - SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); - if (color.enablelighting) { - SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); - if (color.ambsource) { // from vertex - if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); - } - } - } - } - - // no shared lights - for (int i = 0; i < 8; ++i) - { - if (!(mask&(1<(object, i, j, lightsName, 1); - if (!(mask&(1<(object, i, j+2, lightsName, 2); - } - } - else if (color.enablelighting || alpha.enablelighting) - { - // lights are disabled on one channel so process only the active ones - const LitChannel& workingchannel = color.enablelighting ? color : alpha; - const int lit_index = color.enablelighting ? j : (j+2); - int coloralpha = color.enablelighting ? 1 : 2; - - SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); - for (int i = 0; i < 8; ++i) - { - if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); - } - } - object.Write("%s%d = mat * saturate(lacc);\n", dest, j); - object.Write("}\n"); - } -} - -// TODO: Problem: this one uses copy constructors or sth for uids when returning... -template -void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) -{ -#undef SetUidField -#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; - - if (type == GO_ShaderCode) - { - out.SetBuffer(text); - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - } - - /// text[sizeof(text) - 1] = 0x7C; // canary - - bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); - u32 lightMask = 0; - if (xfregs.numChan.numColorChans > 0) - lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); - if (xfregs.numChan.numColorChans > 1) - lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); - - out.Write("//Vertex Shader: comp:%x, \n", components); - out.Write("typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_" I_POSNORMALMATRIX";\n" - "typedef struct { float4 t; } FLT4;\n" - "typedef struct { FLT4 T[24]; } s_" I_TEXMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_" I_TRANSFORMMATRICES";\n" - "typedef struct { FLT4 T[32]; } s_" I_NORMALMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_" I_POSTTRANSFORMMATRICES";\n" - "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" - "typedef struct { Light lights[8]; } s_" I_LIGHTS";\n" - "typedef struct { float4 C0, C1, C2, C3; } s_" I_MATERIALS";\n" - "typedef struct { float4 T0, T1, T2, T3; } s_" I_PROJECTION";\n" - ); - -/// p = GenerateVSOutputStruct(p, components, api_type); - GenerateVSOutputStruct(out, components, api_type); - - // uniforms - - out.Write("uniform s_" I_TRANSFORMMATRICES" " I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); - out.Write("uniform s_" I_TEXMATRICES" " I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); - out.Write("uniform s_" I_NORMALMATRICES" " I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); - out.Write("uniform s_" I_POSNORMALMATRIX" " I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); - out.Write("uniform s_" I_POSTTRANSFORMMATRICES" " I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); - out.Write("uniform s_" I_LIGHTS" " I_LIGHTS" : register(c%d);\n", C_LIGHTS); - out.Write("uniform s_" I_MATERIALS" " I_MATERIALS" : register(c%d);\n", C_MATERIALS); - out.Write("uniform s_" I_PROJECTION" " I_PROJECTION" : register(c%d);\n", C_PROJECTION); - out.Write("uniform float4 " I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); - - out.Write("VS_OUTPUT main(\n"); - - SetUidField(numTexGens, xfregs.numTexGen.numTexGens); - SetUidField(components, components); - // inputs - if (components & VB_HAS_NRM0) - out.Write(" float3 rawnorm0 : NORMAL0,\n"); - if (components & VB_HAS_NRM1) - { - if (is_d3d) - out.Write(" float3 rawnorm1 : NORMAL1,\n"); - else - out.Write(" float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); - } - if (components & VB_HAS_NRM2) - { - if (is_d3d) - out.Write(" float3 rawnorm2 : NORMAL2,\n"); - else - out.Write(" float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); - } - if (components & VB_HAS_COL0) - out.Write(" float4 color0 : COLOR0,\n"); - if (components & VB_HAS_COL1) - out.Write(" float4 color1 : COLOR1,\n"); - for (int i = 0; i < 8; ++i) { - u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); - out.Write("float3 N0 = " I_NORMALMATRICES".T[normidx].t.xyz, N1 = " I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = " I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); - } - - if (components & VB_HAS_NRM0) - out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) - out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); - if (components & VB_HAS_NRM2) - out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); - } - else - { - out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX".T0, rawpos), dot(" I_POSNORMALMATRIX".T1, rawpos), dot(" I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); - if (components & VB_HAS_NRM0) - out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) - out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); - if (components & VB_HAS_NRM2) - out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); - } - - if (!(components & VB_HAS_NRM0)) - out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); - - - - out.Write("o.pos = float4(dot(" I_PROJECTION".T0, pos), dot(" I_PROJECTION".T1, pos), dot(" I_PROJECTION".T2, pos), dot(" I_PROJECTION".T3, pos));\n"); - - out.Write("float4 mat, lacc;\n" - "float3 ldir, h;\n" - "float dist, dist2, attn;\n"); - - SetUidField(numColorChans, xfregs.numChan.numColorChans); - if(xfregs.numChan.numColorChans == 0) - { - if (components & VB_HAS_COL0) - out.Write("o.colors_0 = color0;\n"); - else - out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - - // TODO: This probably isn't necessary if pixel lighting is enabled. - _GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); - - if(xfregs.numChan.numColorChans < 2) - { - if (components & VB_HAS_COL1) - out.Write("o.colors_1 = color1;\n"); - else - out.Write("o.colors_1 = o.colors_0;\n"); - } - // special case if only pos and tex coord 0 and tex coord input is AB11 - // donko - this has caused problems in some games. removed for now. - bool texGenSpecialCase = false; - /*bool texGenSpecialCase = - ((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 - (g_VtxDesc.Tex0Coord != NOT_PRESENT) && - (xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); - */ - - // transform texcoords - out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; - - out.Write("{\n"); - out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - SetUidField(texMtxInfo[i].sourcerow, xfregs.texMtxInfo[i].sourcerow); - switch (texinfo.sourcerow) { - case XF_SRCGEOM_INROW: - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = rawpos;\n"); // pos.w is 1 - break; - case XF_SRCNORMAL_INROW: - if (components & VB_HAS_NRM0) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n"); - } - break; - case XF_SRCCOLORS_INROW: - _assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 ); - break; - case XF_SRCBINORMAL_T_INROW: - if (components & VB_HAS_NRM1) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n"); - } - break; - case XF_SRCBINORMAL_B_INROW: - if (components & VB_HAS_NRM2) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n"); - } - break; - default: - _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); - if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) - out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); - break; - } - - // first transformation - SetUidField(texMtxInfo[i].texgentype, xfregs.texMtxInfo[i].texgentype); - switch (texinfo.texgentype) { - case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map - - if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { - // transform the light dir into tangent space - SetUidField(texMtxInfo[i].embosslightshift, xfregs.texMtxInfo[i].embosslightshift); - SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); - out.Write("ldir = normalize(" I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); - out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); - } - else - { - _assert_(0); // should have normals - SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); - out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); - } - - break; - case XF_TEXGEN_COLOR_STRGBC0: - _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); - break; - case XF_TEXGEN_COLOR_STRGBC1: - _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); - break; - case XF_TEXGEN_REGULAR: - default: - SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); - if (components & (VB_HAS_TEXMTXIDX0<(object, components, api_type); -} - -void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) -{ - GenerateVertexShader(object, components, api_type); -} diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 32d3eff91b..e2c96b972a 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -51,7 +51,7 @@ #define C_VENVCONST_END (C_DEPTHPARAMS + 4) // TODO: Need packing? -struct uid_data +struct vertex_shader_uid_data { u32 components; u32 numColorChans : 2; @@ -82,8 +82,8 @@ struct uid_data } lit_chans[4]; }; -typedef ShaderUid VertexShaderUid; -typedef ShaderCode VertexShaderCode; +typedef ShaderUid VertexShaderUid; +typedef ShaderCode VertexShaderCode; void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h~ b/Source/Core/VideoCommon/Src/VertexShaderGen.h~ index 050ed76649..32d3eff91b 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h~ +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h~ @@ -82,7 +82,7 @@ struct uid_data } lit_chans[4]; }; -typedef ShaderUid VertexShaderUid; +typedef ShaderUid VertexShaderUid; typedef ShaderCode VertexShaderCode; void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 4364bea6ea..7fef0cac3c 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -144,7 +144,7 @@ struct VideoConfig int iAdapter; // Debugging - bool bEnableShaderDebugging; + bool bEnableShaderDebugging; // TODO: Obsolete? // Static config per API // TODO: Move this out of VideoConfig diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index 61e41008b3..cdd5d6e044 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -39,13 +39,13 @@ static int s_nMaxPixelInstructions; static GLuint s_ColorMatrixProgram = 0; static GLuint s_DepthMatrixProgram = 0; PixelShaderCache::PSCache PixelShaderCache::PixelShaders; -PIXELSHADERUID PixelShaderCache::s_curuid; +PixelShaderUid PixelShaderCache::s_curuid; bool PixelShaderCache::s_displayCompileAlert; GLuint PixelShaderCache::CurrentShader; bool PixelShaderCache::ShaderEnabled; PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry = NULL; -PIXELSHADERUID PixelShaderCache::last_uid; +PixelShaderUid PixelShaderCache::last_uid; GLuint PixelShaderCache::GetDepthMatrixProgram() { @@ -183,16 +183,15 @@ void PixelShaderCache::Shutdown() FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { - PIXELSHADERUID uid; - GetPixelShaderId(&uid, dstAlphaMode, components); - + PixelShaderUid uid; + GetPixelShaderId(uid, dstAlphaMode, API_OPENGL, components); + // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(API_OPENGL, last_entry->safe_uid, last_entry->shader.strprog, dstAlphaMode, components); return &last_entry->shader; } } @@ -206,19 +205,18 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, dstAlphaMode, components); return &last_entry->shader; } // Make an entry in the table PSCacheEntry& newentry = PixelShaders[uid]; last_entry = &newentry; - const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components); + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_OPENGL, components); - if (g_ActiveConfig.bEnableShaderDebugging && code) + if (g_ActiveConfig.bEnableShaderDebugging) { - GetSafePixelShaderId(&newentry.safe_uid, dstAlphaMode, components); - newentry.shader.strprog = code; + newentry.shader.strprog = code.GetBuffer(); } #if defined(_DEBUG) || defined(DEBUGFAST) @@ -227,11 +225,11 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp char szTemp[MAX_PATH]; sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, code); + SaveData(szTemp, code.GetBuffer()); /// XXX } #endif - if (!code || !CompilePixelShader(newentry.shader, code)) { + if (!code.GetBuffer() || !CompilePixelShader(newentry.shader, code.GetBuffer())) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return NULL; } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h index f528eeb35b..aff1b49fcb 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h @@ -53,20 +53,19 @@ class PixelShaderCache { shader.Destroy(); } - PIXELSHADERUIDSAFE safe_uid; }; - typedef std::map PSCache; + typedef std::map PSCache; static PSCache PixelShaders; - static PIXELSHADERUID s_curuid; // the current pixel shader uid (progressively changed as memory is written) + static PixelShaderUid s_curuid; // the current pixel shader uid (progressively changed as memory is written) static bool s_displayCompileAlert; static GLuint CurrentShader; static PSCacheEntry* last_entry; - static PIXELSHADERUID last_uid; + static PixelShaderUid last_uid; static bool ShaderEnabled;