diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 52a4477390..f52c6b4190 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -1,1242 +1,1242 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include -#include -#include -#include - -#include "LightingShaderGen.h" -#include "PixelShaderGen.h" -#include "XFMemory.h" // for texture projection mode -#include "BPMemory.h" -#include "VideoConfig.h" -#include "NativeVertexFormat.h" - -PIXELSHADERUID last_pixel_shader_uid; - -static int AlphaPreTest(); - -static void StageHash(int stage, u32* out) -{ - out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24 - u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now - out[0] |= (alphaC&0xF0) << 24; // 8 - out[1] |= alphaC >> 8; // 16 - - // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3 - out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1 - // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan - - bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages; - out[2] |= bHasIndStage << 2; // 1 - - bool needstexcoord = false; - - if (bHasIndStage) - { - out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation - needstexcoord = true; - } - - - TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC; - TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC; - - if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC - || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC - || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC - || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC - || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA - || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) - { - out[0] |= bpmem.combiners[stage].alphaC.rswap; - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2 - out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23; - out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1; - } - - out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1); - if (bpmem.tevorders[stage/2].getEnable(stage&1)) - { - if (bHasIndStage) needstexcoord = true; - - out[0] |= bpmem.combiners[stage].alphaC.tswap; - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2 - out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16; - } - - if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST - || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) - { - out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5 - out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5 - } - - if (needstexcoord) - { - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16; - } -} - -// Mash together all the inputs that contribute to the code of a generated pixel shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -// It would likely be a lot more efficient to build this incrementally as the attributes -// are set... -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode) -{ - uid->values[0] |= bpmem.genMode.numtevstages; // 4 - uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 - uid->values[0] |= dstAlphaMode << 8; // 2 - - bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth; - - uid->values[0] |= DepthTextureEnable << 10; // 1 - - bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - uid->values[0] |= enablePL << 11; // 1 - - if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4 - u32 alphaPreTest = AlphaPreTest()+1; - - uid->values[0] |= alphaPreTest << 16; // 2 - - if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS)) - { - // Courtesy of PreAlphaTest, we're done already ;) - // NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though. - uid->num_values = 1; - return; - } - - for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) - { - if (18+i < 32) - uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1 - else - uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1 - } - - uid->values[1] = bpmem.genMode.numindstages << 2; // 3 - u32 indirectStagesUsed = 0; - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - indirectStagesUsed |= (1 << bpmem.tevind[i].bt); - - assert(indirectStagesUsed == (indirectStagesUsed & 0xF)); - - uid->values[1] |= indirectStagesUsed << 5; // 4; - - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - { - if (indirectStagesUsed & (1 << i)) - { - uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (9 + 3*i); // 1 - if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) - uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (10 + 3*i); // 2 - } - } - - u32* ptr = &uid->values[2]; - for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) - { - StageHash(i, ptr); - ptr += 4; // max: ptr = &uid->values[66] - } - - ptr[0] |= bpmem.alphaFunc.comp0; // 3 - ptr[0] |= bpmem.alphaFunc.comp1 << 3; // 3 - ptr[0] |= bpmem.alphaFunc.logic << 6; // 2 - - if (alphaPreTest == 0 || alphaPreTest == 2) - { - ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 8; // 3 - if (DepthTextureEnable) - { - ptr[0] |= bpmem.ztex2.op << 11; // 2 - ptr[0] |= bpmem.zcontrol.zcomploc << 13; // 1 - ptr[0] |= bpmem.zmode.testenable << 14; // 1 - ptr[0] |= bpmem.zmode.updateenable << 15; // 1 - } - } - - if (dstAlphaMode != DSTALPHA_ALPHA_PASS) - { - if (bpmem.fog.c_proj_fsel.fsel != 0) - { - ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1 - ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1 - } - } - - ++ptr; - if (enablePL) - ptr += GetLightingShaderId(ptr); - - uid->num_values = ptr - uid->values; -} - -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode) -{ - u32* ptr = uid->values; - *ptr++ = dstAlphaMode; // 0 - *ptr++ = bpmem.genMode.hex; // 1 - *ptr++ = bpmem.ztex2.hex; // 2 - *ptr++ = bpmem.zcontrol.hex; // 3 - *ptr++ = bpmem.zmode.hex; // 4 - *ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5 - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6 - *ptr++ = xfregs.numTexGen.hex; // 7 - - if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - // TODO: Include register states for lighting shader - *ptr++ = xfregs.color[0].hex; - *ptr++ = xfregs.alpha[0].hex; - *ptr++ = xfregs.color[1].hex; - *ptr++ = xfregs.alpha[1].hex; - } - - for (unsigned int i = 0; i < 8; ++i) - *ptr++ = xfregs.texMtxInfo[i].hex; // 8-15 - - for (unsigned int i = 0; i < 16; ++i) - *ptr++ = bpmem.tevind[i].hex; // 16-31 - - *ptr++ = bpmem.tevindref.hex; // 32 - - for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times - { - *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i - *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i - *ptr++ = bpmem.tevind[i].hex; // 35+5*i - *ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i - *ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i - } - - ptr = &uid->values[113]; - - *ptr++ = bpmem.alphaFunc.hex; // 113 - - *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114 - *ptr++ = bpmem.fogRange.Base.hex; // 115 - - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - PIXELSHADERUIDSAFE new_id; - GetSafePixelShaderId(&new_id, dstAlphaMode); - - if (!(old_id == new_id)) - { - std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components)); - if (old_code != new_code) - { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file(szTemp); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); - } - } -} - -// old tev->pixelshader notes -// -// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 -// konstant for this stage (alpha, color) is given by bpmem.tevksel -// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current chan color -// according to GXTevColorArg table above -// output is given by .outreg -// tevtemp is set according to swapmodetables and - -static void WriteStage(char *&p, int n, API_TYPE ApiType); -static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -// static void WriteAlphaCompare(char *&p, int num, int comp); -static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); -static void WriteFog(char *&p); - -static const char *tevKSelTableC[] = // KCSEL -{ - "1.0f,1.0f,1.0f", // 1 = 0x00 - "0.875f,0.875f,0.875f", // 7_8 = 0x01 - "0.75f,0.75f,0.75f", // 3_4 = 0x02 - "0.625f,0.625f,0.625f", // 5_8 = 0x03 - "0.5f,0.5f,0.5f", // 1_2 = 0x04 - "0.375f,0.375f,0.375f", // 3_8 = 0x05 - "0.25f,0.25f,0.25f", // 1_4 = 0x06 - "0.125f,0.125f,0.125f", // 1_8 = 0x07 - "ERROR", // 0x08 - "ERROR", // 0x09 - "ERROR", // 0x0a - "ERROR", // 0x0b - I_KCOLORS"[0].rgb", // K0 = 0x0C - I_KCOLORS"[1].rgb", // K1 = 0x0D - I_KCOLORS"[2].rgb", // K2 = 0x0E - I_KCOLORS"[3].rgb", // K3 = 0x0F - I_KCOLORS"[0].rrr", // K0_R = 0x10 - I_KCOLORS"[1].rrr", // K1_R = 0x11 - I_KCOLORS"[2].rrr", // K2_R = 0x12 - I_KCOLORS"[3].rrr", // K3_R = 0x13 - I_KCOLORS"[0].ggg", // K0_G = 0x14 - I_KCOLORS"[1].ggg", // K1_G = 0x15 - I_KCOLORS"[2].ggg", // K2_G = 0x16 - I_KCOLORS"[3].ggg", // K3_G = 0x17 - I_KCOLORS"[0].bbb", // K0_B = 0x18 - I_KCOLORS"[1].bbb", // K1_B = 0x19 - I_KCOLORS"[2].bbb", // K2_B = 0x1A - I_KCOLORS"[3].bbb", // K3_B = 0x1B - I_KCOLORS"[0].aaa", // K0_A = 0x1C - I_KCOLORS"[1].aaa", // K1_A = 0x1D - I_KCOLORS"[2].aaa", // K2_A = 0x1E - I_KCOLORS"[3].aaa", // K3_A = 0x1F -}; - -static const char *tevKSelTableA[] = // KASEL -{ - "1.0f", // 1 = 0x00 - "0.875f",// 7_8 = 0x01 - "0.75f", // 3_4 = 0x02 - "0.625f",// 5_8 = 0x03 - "0.5f", // 1_2 = 0x04 - "0.375f",// 3_8 = 0x05 - "0.25f", // 1_4 = 0x06 - "0.125f",// 1_8 = 0x07 - "ERROR", // 0x08 - "ERROR", // 0x09 - "ERROR", // 0x0a - "ERROR", // 0x0b - "ERROR", // 0x0c - "ERROR", // 0x0d - "ERROR", // 0x0e - "ERROR", // 0x0f - I_KCOLORS"[0].r", // K0_R = 0x10 - I_KCOLORS"[1].r", // K1_R = 0x11 - I_KCOLORS"[2].r", // K2_R = 0x12 - I_KCOLORS"[3].r", // K3_R = 0x13 - I_KCOLORS"[0].g", // K0_G = 0x14 - I_KCOLORS"[1].g", // K1_G = 0x15 - I_KCOLORS"[2].g", // K2_G = 0x16 - I_KCOLORS"[3].g", // K3_G = 0x17 - I_KCOLORS"[0].b", // K0_B = 0x18 - I_KCOLORS"[1].b", // K1_B = 0x19 - I_KCOLORS"[2].b", // K2_B = 0x1A - I_KCOLORS"[3].b", // K3_B = 0x1B - I_KCOLORS"[0].a", // K0_A = 0x1C - I_KCOLORS"[1].a", // K1_A = 0x1D - I_KCOLORS"[2].a", // K2_A = 0x1E - I_KCOLORS"[3].a", // K3_A = 0x1F -}; - -static const char *tevScaleTable[] = // CS -{ - "1.0f", // SCALE_1 - "2.0f", // SCALE_2 - "4.0f", // SCALE_4 - "0.5f", // DIVIDE_2 -}; - -static const char *tevBiasTable[] = // TB -{ - "", // ZERO, - "+0.5f", // ADDHALF, - "-0.5f", // SUBHALF, - "", -}; - -static const char *tevOpTable[] = { // TEV - "+", // TEVOP_ADD = 0, - "-", // TEVOP_SUB = 1, -}; - -static const char *tevCInputTable[] = // CC -{ - "(prev.rgb)", // CPREV, - "(prev.aaa)", // APREV, - "(c0.rgb)", // C0, - "(c0.aaa)", // A0, - "(c1.rgb)", // C1, - "(c1.aaa)", // A1, - "(c2.rgb)", // C2, - "(c2.aaa)", // A2, - "(textemp.rgb)", // TEXC, - "(textemp.aaa)", // TEXA, - "(rastemp.rgb)", // RASC, - "(rastemp.aaa)", // RASA, - "float3(1.0f, 1.0f, 1.0f)", // ONE - "float3(0.5f, 0.5f, 0.5f)", // HALF - "(konsttemp.rgb)", //"konsttemp.rgb", // KONST - "float3(0.0f, 0.0f, 0.0f)", // ZERO - ///aded extra values to map clamped values - "(cprev.rgb)", // CPREV, - "(cprev.aaa)", // APREV, - "(cc0.rgb)", // C0, - "(cc0.aaa)", // A0, - "(cc1.rgb)", // C1, - "(cc1.aaa)", // A1, - "(cc2.rgb)", // C2, - "(cc2.aaa)", // A2, - "(textemp.rgb)", // TEXC, - "(textemp.aaa)", // TEXA, - "(crastemp.rgb)", // RASC, - "(crastemp.aaa)", // RASA, - "float3(1.0f, 1.0f, 1.0f)", // ONE - "float3(0.5f, 0.5f, 0.5f)", // HALF - "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST - "float3(0.0f, 0.0f, 0.0f)", // ZERO - "PADERROR", "PADERROR", "PADERROR", "PADERROR" -}; - -static const char *tevAInputTable[] = // CA -{ - "prev", // APREV, - "c0", // A0, - "c1", // A1, - "c2", // A2, - "textemp", // TEXA, - "rastemp", // RASA, - "konsttemp", // KONST, (hw1 had quarter) - "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO - ///aded extra values to map clamped values - "cprev", // APREV, - "cc0", // A0, - "cc1", // A1, - "cc2", // A2, - "textemp", // TEXA, - "crastemp", // RASA, - "ckonsttemp", // KONST, (hw1 had quarter) - "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", -}; - -static const char *tevRasTable[] = -{ - "colors_0", - "colors_1", - "ERROR", //2 - "ERROR", //3 - "ERROR", //4 - "alphabump", // use bump alpha - "(alphabump*(255.0f/248.0f))", //normalized - "float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero -}; - -//static const char *tevTexFunc[] = { "tex2D", "texRECT" }; - -static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; -static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; -static const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; -//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"}; -static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)", "*(240.0f/255.0f)", "*(248.0f/255.0f)"}; -static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias -static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt -static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" }; -static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" }; - -#define WRITE p+=sprintf - -static char swapModeTable[4][5]; - -static char text[16384]; -static bool DepthTextureEnable; - -static void BuildSwapModeTable() -{ - static const char *swapColors = "rgba"; - for (int i = 0; i < 4; i++) - { - swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1]; - swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2]; - swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1]; - swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2]; - swapModeTable[i][4] = 0; - } -} - -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) -{ - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - text[sizeof(text) - 1] = 0x7C; // canary - - BuildSwapModeTable(); // Needed for WriteStage - int numStages = bpmem.genMode.numtevstages + 1; - int numTexgen = bpmem.genMode.numtexgens; - - char *p = text; - WRITE(p, "//Pixel Shader for TEV stages\n"); - WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n", - numStages, numTexgen/*, bpmem.genMode.numindstages*/); - - int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) - { - for (int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ; - // Declare samplers - - if(ApiType != API_D3D11) - { - WRITE(p, "uniform sampler2D "); - } - else - { - WRITE(p, "sampler "); - } - - bool bfirst = true; - for (int i = 0; i < 8; ++i) - { - WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",", i, i); - bfirst = false; - } - WRITE(p, ";\n"); - if(ApiType == API_D3D11) - { - WRITE(p, "Texture2D "); - bfirst = true; - for (int i = 0; i < 8; ++i) - { - WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i); - bfirst = false; - } - WRITE(p, ";\n"); - } - - WRITE(p, "\n"); - - WRITE(p, "uniform float4 "I_COLORS"[4] : register(c%d);\n", C_COLORS); - WRITE(p, "uniform float4 "I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); - WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA); - WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); - WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); - WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); - WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); - WRITE(p, "uniform float4 "I_FOG"[3] : register(c%d);\n", C_FOG); - - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); - WRITE(p,"typedef struct { Light lights[8]; } s_"I_PLIGHTS";\n"); - WRITE(p, "uniform s_"I_PLIGHTS" "I_PLIGHTS" : register(c%d);\n", C_PLIGHTS); - WRITE(p, "typedef struct { float4 C0, C1, C2, C3; } s_"I_PMATERIALS";\n"); - WRITE(p, "uniform s_"I_PMATERIALS" "I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); - } - - WRITE(p, "void main(\n"); - if(ApiType != API_D3D11) - { - WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", - dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "", - DepthTextureEnable ? "\n out float depth : DEPTH," : "", - ApiType & API_OPENGL ? "WPOS" : ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS"); - } - else - { - WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", - dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", - DepthTextureEnable ? "\n out float depth : SV_Depth," : ""); - } - - WRITE(p, " in float4 colors_0 : COLOR0,\n"); - WRITE(p, " in float4 colors_1 : COLOR1"); - - // compute window position if needed because binding semantic WPOS is not widely supported - if (numTexgen < 7) - { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, ",\n in float3 uv%d : TEXCOORD%d", i, i); - WRITE(p, ",\n in float4 clipPos : TEXCOORD%d", numTexgen); - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, ",\n in float4 Normal : TEXCOORD%d", numTexgen + 1); - } - else - { - // wpos is in w of first 4 texcoords - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - for (int i = 0; i < 8; ++i) - WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i); - } - else - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); - } - } - WRITE(p, " ) {\n"); - - char* pmainstart = p; - int Pretest = AlphaPreTest(); - if (dstAlphaMode == DSTALPHA_ALPHA_PASS && !DepthTextureEnable && Pretest >= 0) - { - if (!Pretest) - { - // alpha test will always fail, so restart the shader and just make it an empty function - WRITE(p, "ocol0 = 0;\n"); - WRITE(p, "discard;\n"); - if(ApiType != API_D3D11) - WRITE(p, "return;\n"); - } - else - { - WRITE(p, " ocol0 = "I_ALPHA"[0].aaaa;\n"); - } - WRITE(p, "}\n"); - return text; - } - - WRITE(p, " float4 c0 = "I_COLORS"[1], c1 = "I_COLORS"[2], c2 = "I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" - " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n" - " float4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n" - " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n" - " float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n" - " float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n" - " float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n" - " float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n"); - - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - if (xfregs.numTexGen.numTexGens < 7) - { - WRITE(p,"float3 _norm0 = normalize(Normal.xyz);\n\n"); - WRITE(p,"float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); - } - else - { - WRITE(p," float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); - WRITE(p,"float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); - } - - - WRITE(p, "float4 mat, lacc;\n" - "float3 ldir, h;\n" - "float dist, dist2, attn;\n"); - - p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); - } - - if (numTexgen < 7) - WRITE(p, "clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); - else - WRITE(p, "float4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); - - // HACK to handle cases where the tex gen is not enabled - if (numTexgen == 0) - { - WRITE(p, "float3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); - } - else - { - for (int i = 0; i < numTexgen; ++i) - { - // optional perspective divides - if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) - { - WRITE(p, "if (uv%d.z)", i); - WRITE(p, " uv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); - } - - WRITE(p, "uv%d.xy = uv%d.xy * "I_TEXDIMS"[%d].zw;\n", i, i, i); - } - } - - // indirect texture map lookup - for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) - { - if (nIndirectStagesUsed & (1<= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_GT 8 - " %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_EQ 9 - " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_GT 10 - " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_EQ 11 - " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_GT 12 - " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_EQ 13 - " %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)), float3(0.0f, 0.0f, 0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14 - " %s + ((float3(1.0f, 1.0f, 1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)), float3(0.0f, 0.0f, 0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15 -}; - -//table with the alpha compare operations -static const char *TEVCMPAlphaOPTable[16] = -{ - "0.0f",//0 - "0.0f",//1 - "0.0f",//2 - "0.0f",//3 - "0.0f",//4 - "0.0f",//5 - "0.0f",//6 - "0.0f",//7 - " %s.a + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_R8_GT 8 - " %s.a + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_R8_EQ 9 - " %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_GR16_GT 10 - " %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_GR16_EQ 11 - " %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_GT 12 - " %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_EQ 13 - " %s.a + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_A8_GT 14 - " %s.a + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s.a : 0.0f)"//#define TEVCMP_A8_EQ 15 - -}; - - -static void WriteStage(char *&p, int n, API_TYPE ApiType) -{ - int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); - bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; - bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages; - - // HACK to handle cases where the tex gen is not enabled - if (!bHasTexCoord) - texcoord = 0; - - WRITE(p, "// TEV stage %d\n", n); - - if (bHasIndStage) - { - WRITE(p, "// indirect op\n"); - // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords - if (bpmem.tevind[n].bs != ITBA_OFF) - { - WRITE(p, "alphabump = indtex%d.%s %s;\n", - bpmem.tevind[n].bt, - tevIndAlphaSel[bpmem.tevind[n].bs], - tevIndAlphaScale[bpmem.tevind[n].fmt]); - } - // format - WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); - - // bias - if (bpmem.tevind[n].bias != ITB_NONE ) - WRITE(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); - - // multiply by offset matrix and scale - if (bpmem.tevind[n].mid != 0) - { - if (bpmem.tevind[n].mid <= 3) - { - int mtxidx = 2*(bpmem.tevind[n].mid-1); - WRITE(p, "float2 indtevtrans%d = float2(dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", - n, mtxidx, n, mtxidx+1, n); - } - else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) - { // s matrix - // TODO: Might become negative? - int mtxidx = 2*(bpmem.tevind[n].mid-5); - WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); - } - else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) - { // t matrix - // TODO: Might become negative? - int mtxidx = 2*(bpmem.tevind[n].mid-9); - WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); - } - else - WRITE(p, "float2 indtevtrans%d = 0;\n", n); - } - else - WRITE(p, "float2 indtevtrans%d = 0;\n", n); - - // --------- - // Wrapping - // --------- - - // wrap S - if (bpmem.tevind[n].sw == ITW_OFF) - WRITE(p, "wrappedcoord.x = uv%d.x;\n", texcoord); - else if (bpmem.tevind[n].sw == ITW_0) - WRITE(p, "wrappedcoord.x = 0.0f;\n"); - else - WRITE(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); - - // wrap T - if (bpmem.tevind[n].tw == ITW_OFF) - WRITE(p, "wrappedcoord.y = uv%d.y;\n", texcoord); - else if (bpmem.tevind[n].tw == ITW_0) - WRITE(p, "wrappedcoord.y = 0.0f;\n"); - else - WRITE(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); - - if (bpmem.tevind[n].fb_addprev) // add previous tevcoord - WRITE(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); - else - WRITE(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); - } - - TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; - TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - - // blah1 - if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC - || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC - || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC - || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC - || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA - || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) - { - char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); - WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); - } - - - if (bpmem.tevorders[n/2].getEnable(n&1)) - { - if(!bHasIndStage) - { - // calc tevcord - if(bHasTexCoord) - WRITE(p, "tevcoord.xy = uv%d.xy;\n", texcoord); - else - WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n"); - } - - char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; - int texmap = bpmem.tevorders[n/2].getTexMap(n&1); - SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType); - } - else - WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - - - // blah2 - if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST - || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) - { - int kc = bpmem.tevksel[n / 2].getKC(n & 1); - int ka = bpmem.tevksel[n / 2].getKA(n & 1); - WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); - if(kc > 7 || ka > 7) - { - WRITE(p, "ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); - } - else - { - WRITE(p, "ckonsttemp = konsttemp;\n"); - } - } - - if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV - || cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV - || cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV - || ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || ac.c == TEVALPHAARG_APREV) - WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); - - - if(cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0 - || cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0 - || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 - || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0) - WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); - - - if(cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1 - || cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1 - || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1 - || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1) - WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); - - - if(cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2 - || cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2 - || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2 - || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2) - WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); - - - WRITE(p, "// color combine\n"); - if (cc.clamp) - WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]); - else - WRITE(p, "%s = ", tevCOutputTable[cc.dest]); - - // combine the color channel - if (cc.bias != TevBias_COMPARE) // if not compare - { - //normal color combiner goes here - if (cc.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[cc.shift]); - - if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - WRITE(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); - - if (cc.a == cc.b) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); - else if (cc.c == TEVCOLORARG_ZERO) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); - else if (cc.c == TEVCOLORARG_ONE) - WRITE(p, "%s", tevCInputTable[cc.b + 16]); - else if (cc.a == TEVCOLORARG_ZERO) - WRITE(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); - else if (cc.b == TEVCOLORARG_ZERO) - WRITE(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); - else - WRITE(p, "lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); - - WRITE(p, "%s", tevBiasTable[cc.bias]); - - if (cc.shift > TEVSCALE_1) - WRITE(p, ")"); - } - else - { - int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here - WRITE(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table - tevCInputTable[cc.d], - tevCInputTable[cc.a + 16], - tevCInputTable[cc.b + 16], - tevCInputTable[cc.c + 16]); - } - if (cc.clamp) - WRITE(p, ")"); - WRITE(p,";\n"); - - WRITE(p, "// alpha combine\n"); - // combine the alpha channel - if (ac.clamp) - WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]); - else - WRITE(p, "%s = ", tevAOutputTable[ac.dest]); - - if (ac.bias != TevBias_COMPARE) // if not compare - { - //normal alpha combiner goes here - if (ac.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[ac.shift]); - - if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - WRITE(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); - - if (ac.a == ac.b) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); - else if (ac.c == TEVALPHAARG_ZERO) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); - else if (ac.a == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); - else if (ac.b == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); - else - WRITE(p, "lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); - - WRITE(p, "%s",tevBiasTable[ac.bias]); - - if (ac.shift>0) - WRITE(p, ")"); - - } - else - { - //compare alpha combiner goes here - int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - WRITE(p, TEVCMPAlphaOPTable[cmp], - tevAInputTable[ac.d], - tevAInputTable[ac.a + 8], - tevAInputTable[ac.b + 8], - tevAInputTable[ac.c + 8]); - } - if (ac.clamp) - WRITE(p, ")"); - WRITE(p, ";\n\n"); - WRITE(p, "// TEV done\n"); -} - -void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) -{ - if (ApiType == API_D3D11) - WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); - else - WRITE(p, "%s=tex2D(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); -} - -static const char *tevAlphaFuncsTable[] = -{ - "(false)", //ALPHACMP_NEVER 0 - "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 - "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 - "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 - "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4 - "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5 - "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6 - "(true)" //ALPHACMP_ALWAYS 7 -}; - -static const char *tevAlphaFunclogicTable[] = -{ - " && ", // and - " || ", // or - " != ", // xor - " == " // xnor -}; -static int AlphaPreTest() -{ - u32 op = bpmem.alphaFunc.logic; - u32 comp[2] = {bpmem.alphaFunc.comp0, bpmem.alphaFunc.comp1}; - - // First kill all the simple cases - switch(op) - { - case 0: // AND - if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true; - if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) return false; - break; - case 1: // OR - if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true; - if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)return false; - break; - case 2: // XOR - if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) - return true; - if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) - return false; - break; - case 3: // XNOR - if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) - return false; - if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) - return true; - break; - default: PanicAlert("bad logic for alpha test? %08x", op); - } - return -1; -} - - -static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode) -{ - static const char *alphaRef[2] = - { - I_ALPHA"[0].r", - I_ALPHA"[0].g" - }; - - int Pretest = AlphaPreTest(); - if(Pretest >= 0) - { - return Pretest != 0; - } - - // using discard then return works the same in cg and dx9 but not in dx11 - WRITE(p, "if(!( "); - - int compindex = bpmem.alphaFunc.comp0 % 8; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table - - WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lookup the logic op - - compindex = bpmem.alphaFunc.comp1 % 8; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table - WRITE(p, ")){ocol0 = 0;%s%s discard;%s}\n", - dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "", - DepthTextureEnable ? "depth = 1.f;" : "", - (ApiType != API_D3D11) ? "return;" : ""); - return true; -} - -static const char *tevFogFuncsTable[] = -{ - "", //No Fog - "", //? - "", //Linear - "", //? - " fog = 1.0f - pow(2.0f, -8.0f * fog);\n", //exp - " fog = 1.0f - pow(2.0f, -8.0f * fog * fog);\n", //exp2 - " fog = pow(2.0f, -8.0f * (1.0f - fog));\n", //backward exp - " fog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" //backward exp2 -}; - -static void WriteFog(char *&p) -{ - if(bpmem.fog.c_proj_fsel.fsel == 0)return;//no Fog - - if (bpmem.fog.c_proj_fsel.proj == 0) - { - // perspective - // ze = A/(B - (Zs >> B_SHF) - WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - (zCoord / "I_FOG"[1].w));\n"); - } - else - { - // orthographic - // ze = a*Zs (here, no B_SHF) - WRITE (p, " float ze = "I_FOG"[1].x * zCoord;\n"); - } - - // x_adjust = sqrt((x-center)^2 + k^2)/k - // ze *= x_adjust - //this is complitly teorical as the real hard seems to use a table intead of calculate the values. - if(bpmem.fogRange.Base.Enabled) - { - WRITE (p, " float x_adjust = (2.0f * (clipPos.x / "I_FOG"[2].y)) - 1.0f - "I_FOG"[2].x;\n"); - WRITE (p, " x_adjust = sqrt(x_adjust * x_adjust + "I_FOG"[2].z * "I_FOG"[2].z) / "I_FOG"[2].z;\n"); - WRITE (p, " ze *= x_adjust;\n"); - } - - WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n"); - - if(bpmem.fog.c_proj_fsel.fsel > 3) - { - WRITE(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); - } - else - { - if(bpmem.fog.c_proj_fsel.fsel != 2) - WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); - } - - WRITE(p, " prev.rgb = lerp(prev.rgb,"I_FOG"[0].rgb,fog);\n"); - - +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include +#include +#include +#include + +#include "LightingShaderGen.h" +#include "PixelShaderGen.h" +#include "XFMemory.h" // for texture projection mode +#include "BPMemory.h" +#include "VideoConfig.h" +#include "NativeVertexFormat.h" + +PIXELSHADERUID last_pixel_shader_uid; + +static int AlphaPreTest(); + +static void StageHash(int stage, u32* out) +{ + out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24 + u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now + out[0] |= (alphaC&0xF0) << 24; // 8 + out[1] |= alphaC >> 8; // 16 + + // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap + out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3 + out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1 + // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan + + bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages; + out[2] |= bHasIndStage << 2; // 1 + + bool needstexcoord = false; + + if (bHasIndStage) + { + out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation + needstexcoord = true; + } + + + TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC; + TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC; + + if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC + || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC + || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC + || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC + || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA + || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) + { + out[0] |= bpmem.combiners[stage].alphaC.rswap; + out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2 + out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2 + out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2 + out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2 + out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23; + out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1; + } + + out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1); + if (bpmem.tevorders[stage/2].getEnable(stage&1)) + { + if (bHasIndStage) needstexcoord = true; + + out[0] |= bpmem.combiners[stage].alphaC.tswap; + out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2 + out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2 + out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2 + out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2 + out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16; + } + + if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST + || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) + { + out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5 + out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5 + } + + if (needstexcoord) + { + out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16; + } +} + +// Mash together all the inputs that contribute to the code of a generated pixel shader into +// a unique identifier, basically containing all the bits. Yup, it's a lot .... +// It would likely be a lot more efficient to build this incrementally as the attributes +// are set... +void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode) +{ + uid->values[0] |= bpmem.genMode.numtevstages; // 4 + uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 + uid->values[0] |= dstAlphaMode << 8; // 2 + + bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth; + + uid->values[0] |= DepthTextureEnable << 10; // 1 + + bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; + uid->values[0] |= enablePL << 11; // 1 + + if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4 + u32 alphaPreTest = AlphaPreTest()+1; + + uid->values[0] |= alphaPreTest << 16; // 2 + + if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS)) + { + // Courtesy of PreAlphaTest, we're done already ;) + // NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though. + uid->num_values = 1; + return; + } + + for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) + { + if (18+i < 32) + uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1 + else + uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1 + } + + uid->values[1] = bpmem.genMode.numindstages << 2; // 3 + u32 indirectStagesUsed = 0; + for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + indirectStagesUsed |= (1 << bpmem.tevind[i].bt); + + assert(indirectStagesUsed == (indirectStagesUsed & 0xF)); + + uid->values[1] |= indirectStagesUsed << 5; // 4; + + for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) + { + if (indirectStagesUsed & (1 << i)) + { + uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (9 + 3*i); // 1 + if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) + uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (10 + 3*i); // 2 + } + } + + u32* ptr = &uid->values[2]; + for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) + { + StageHash(i, ptr); + ptr += 4; // max: ptr = &uid->values[66] + } + + ptr[0] |= bpmem.alphaFunc.comp0; // 3 + ptr[0] |= bpmem.alphaFunc.comp1 << 3; // 3 + ptr[0] |= bpmem.alphaFunc.logic << 6; // 2 + + if (alphaPreTest == 0 || alphaPreTest == 2) + { + ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 8; // 3 + if (DepthTextureEnable) + { + ptr[0] |= bpmem.ztex2.op << 11; // 2 + ptr[0] |= bpmem.zcontrol.zcomploc << 13; // 1 + ptr[0] |= bpmem.zmode.testenable << 14; // 1 + ptr[0] |= bpmem.zmode.updateenable << 15; // 1 + } + } + + if (dstAlphaMode != DSTALPHA_ALPHA_PASS) + { + if (bpmem.fog.c_proj_fsel.fsel != 0) + { + ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1 + ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1 + } + } + + ++ptr; + if (enablePL) + ptr += GetLightingShaderId(ptr); + + uid->num_values = ptr - uid->values; +} + +void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode) +{ + u32* ptr = uid->values; + *ptr++ = dstAlphaMode; // 0 + *ptr++ = bpmem.genMode.hex; // 1 + *ptr++ = bpmem.ztex2.hex; // 2 + *ptr++ = bpmem.zcontrol.hex; // 3 + *ptr++ = bpmem.zmode.hex; // 4 + *ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5 + *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6 + *ptr++ = xfregs.numTexGen.hex; // 7 + + if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + { + // TODO: Include register states for lighting shader + *ptr++ = xfregs.color[0].hex; + *ptr++ = xfregs.alpha[0].hex; + *ptr++ = xfregs.color[1].hex; + *ptr++ = xfregs.alpha[1].hex; + } + + for (unsigned int i = 0; i < 8; ++i) + *ptr++ = xfregs.texMtxInfo[i].hex; // 8-15 + + for (unsigned int i = 0; i < 16; ++i) + *ptr++ = bpmem.tevind[i].hex; // 16-31 + + *ptr++ = bpmem.tevindref.hex; // 32 + + for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times + { + *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i + *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i + *ptr++ = bpmem.tevind[i].hex; // 35+5*i + *ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i + *ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i + } + + ptr = &uid->values[113]; + + *ptr++ = bpmem.alphaFunc.hex; // 113 + + *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114 + *ptr++ = bpmem.fogRange.Base.hex; // 115 + + _assert_((ptr - uid->values) == uid->GetNumValues()); +} + +void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components) +{ + if (!g_ActiveConfig.bEnableShaderDebugging) + return; + + PIXELSHADERUIDSAFE new_id; + GetSafePixelShaderId(&new_id, dstAlphaMode); + + if (!(old_id == new_id)) + { + std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components)); + if (old_code != new_code) + { + _assert_(old_id.GetNumValues() == new_id.GetNumValues()); + + char msg[8192]; + char* ptr = msg; + ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); + const int N = new_id.GetNumValues(); + for (int i = 0; i < N/2; ++i) + ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], + new_id.values[2*i], new_id.values[2*i+1]); + if (N % 2) + ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); + + static int num_failures = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); + std::ofstream file(szTemp); + file << msg; + file << "\n\nOld shader code:\n" << old_code; + file << "\n\nNew shader code:\n" << new_code; + file.close(); + + PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); + } + } +} + +// old tev->pixelshader notes +// +// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 +// konstant for this stage (alpha, color) is given by bpmem.tevksel +// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current chan color +// according to GXTevColorArg table above +// output is given by .outreg +// tevtemp is set according to swapmodetables and + +static void WriteStage(char *&p, int n, API_TYPE ApiType); +static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +// static void WriteAlphaCompare(char *&p, int num, int comp); +static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); +static void WriteFog(char *&p); + +static const char *tevKSelTableC[] = // KCSEL +{ + "1.0f,1.0f,1.0f", // 1 = 0x00 + "0.875f,0.875f,0.875f", // 7_8 = 0x01 + "0.75f,0.75f,0.75f", // 3_4 = 0x02 + "0.625f,0.625f,0.625f", // 5_8 = 0x03 + "0.5f,0.5f,0.5f", // 1_2 = 0x04 + "0.375f,0.375f,0.375f", // 3_8 = 0x05 + "0.25f,0.25f,0.25f", // 1_4 = 0x06 + "0.125f,0.125f,0.125f", // 1_8 = 0x07 + "ERROR", // 0x08 + "ERROR", // 0x09 + "ERROR", // 0x0a + "ERROR", // 0x0b + I_KCOLORS"[0].rgb", // K0 = 0x0C + I_KCOLORS"[1].rgb", // K1 = 0x0D + I_KCOLORS"[2].rgb", // K2 = 0x0E + I_KCOLORS"[3].rgb", // K3 = 0x0F + I_KCOLORS"[0].rrr", // K0_R = 0x10 + I_KCOLORS"[1].rrr", // K1_R = 0x11 + I_KCOLORS"[2].rrr", // K2_R = 0x12 + I_KCOLORS"[3].rrr", // K3_R = 0x13 + I_KCOLORS"[0].ggg", // K0_G = 0x14 + I_KCOLORS"[1].ggg", // K1_G = 0x15 + I_KCOLORS"[2].ggg", // K2_G = 0x16 + I_KCOLORS"[3].ggg", // K3_G = 0x17 + I_KCOLORS"[0].bbb", // K0_B = 0x18 + I_KCOLORS"[1].bbb", // K1_B = 0x19 + I_KCOLORS"[2].bbb", // K2_B = 0x1A + I_KCOLORS"[3].bbb", // K3_B = 0x1B + I_KCOLORS"[0].aaa", // K0_A = 0x1C + I_KCOLORS"[1].aaa", // K1_A = 0x1D + I_KCOLORS"[2].aaa", // K2_A = 0x1E + I_KCOLORS"[3].aaa", // K3_A = 0x1F +}; + +static const char *tevKSelTableA[] = // KASEL +{ + "1.0f", // 1 = 0x00 + "0.875f",// 7_8 = 0x01 + "0.75f", // 3_4 = 0x02 + "0.625f",// 5_8 = 0x03 + "0.5f", // 1_2 = 0x04 + "0.375f",// 3_8 = 0x05 + "0.25f", // 1_4 = 0x06 + "0.125f",// 1_8 = 0x07 + "ERROR", // 0x08 + "ERROR", // 0x09 + "ERROR", // 0x0a + "ERROR", // 0x0b + "ERROR", // 0x0c + "ERROR", // 0x0d + "ERROR", // 0x0e + "ERROR", // 0x0f + I_KCOLORS"[0].r", // K0_R = 0x10 + I_KCOLORS"[1].r", // K1_R = 0x11 + I_KCOLORS"[2].r", // K2_R = 0x12 + I_KCOLORS"[3].r", // K3_R = 0x13 + I_KCOLORS"[0].g", // K0_G = 0x14 + I_KCOLORS"[1].g", // K1_G = 0x15 + I_KCOLORS"[2].g", // K2_G = 0x16 + I_KCOLORS"[3].g", // K3_G = 0x17 + I_KCOLORS"[0].b", // K0_B = 0x18 + I_KCOLORS"[1].b", // K1_B = 0x19 + I_KCOLORS"[2].b", // K2_B = 0x1A + I_KCOLORS"[3].b", // K3_B = 0x1B + I_KCOLORS"[0].a", // K0_A = 0x1C + I_KCOLORS"[1].a", // K1_A = 0x1D + I_KCOLORS"[2].a", // K2_A = 0x1E + I_KCOLORS"[3].a", // K3_A = 0x1F +}; + +static const char *tevScaleTable[] = // CS +{ + "1.0f", // SCALE_1 + "2.0f", // SCALE_2 + "4.0f", // SCALE_4 + "0.5f", // DIVIDE_2 +}; + +static const char *tevBiasTable[] = // TB +{ + "", // ZERO, + "+0.5f", // ADDHALF, + "-0.5f", // SUBHALF, + "", +}; + +static const char *tevOpTable[] = { // TEV + "+", // TEVOP_ADD = 0, + "-", // TEVOP_SUB = 1, +}; + +static const char *tevCInputTable[] = // CC +{ + "(prev.rgb)", // CPREV, + "(prev.aaa)", // APREV, + "(c0.rgb)", // C0, + "(c0.aaa)", // A0, + "(c1.rgb)", // C1, + "(c1.aaa)", // A1, + "(c2.rgb)", // C2, + "(c2.aaa)", // A2, + "(textemp.rgb)", // TEXC, + "(textemp.aaa)", // TEXA, + "(rastemp.rgb)", // RASC, + "(rastemp.aaa)", // RASA, + "float3(1.0f, 1.0f, 1.0f)", // ONE + "float3(0.5f, 0.5f, 0.5f)", // HALF + "(konsttemp.rgb)", //"konsttemp.rgb", // KONST + "float3(0.0f, 0.0f, 0.0f)", // ZERO + ///aded extra values to map clamped values + "(cprev.rgb)", // CPREV, + "(cprev.aaa)", // APREV, + "(cc0.rgb)", // C0, + "(cc0.aaa)", // A0, + "(cc1.rgb)", // C1, + "(cc1.aaa)", // A1, + "(cc2.rgb)", // C2, + "(cc2.aaa)", // A2, + "(textemp.rgb)", // TEXC, + "(textemp.aaa)", // TEXA, + "(crastemp.rgb)", // RASC, + "(crastemp.aaa)", // RASA, + "float3(1.0f, 1.0f, 1.0f)", // ONE + "float3(0.5f, 0.5f, 0.5f)", // HALF + "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST + "float3(0.0f, 0.0f, 0.0f)", // ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR" +}; + +static const char *tevAInputTable[] = // CA +{ + "prev", // APREV, + "c0", // A0, + "c1", // A1, + "c2", // A2, + "textemp", // TEXA, + "rastemp", // RASA, + "konsttemp", // KONST, (hw1 had quarter) + "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO + ///aded extra values to map clamped values + "cprev", // APREV, + "cc0", // A0, + "cc1", // A1, + "cc2", // A2, + "textemp", // TEXA, + "crastemp", // RASA, + "ckonsttemp", // KONST, (hw1 had quarter) + "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", +}; + +static const char *tevRasTable[] = +{ + "colors_0", + "colors_1", + "ERROR", //2 + "ERROR", //3 + "ERROR", //4 + "alphabump", // use bump alpha + "(alphabump*(255.0f/248.0f))", //normalized + "float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero +}; + +//static const char *tevTexFunc[] = { "tex2D", "texRECT" }; + +static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; +static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; +static const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; +//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"}; +static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)", "*(240.0f/255.0f)", "*(248.0f/255.0f)"}; +static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias +static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt +static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" }; +static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" }; + +#define WRITE p+=sprintf + +static char swapModeTable[4][5]; + +static char text[16384]; +static bool DepthTextureEnable; + +static void BuildSwapModeTable() +{ + static const char *swapColors = "rgba"; + for (int i = 0; i < 4; i++) + { + swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1]; + swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2]; + swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1]; + swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2]; + swapModeTable[i][4] = 0; + } +} + +const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +{ + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + text[sizeof(text) - 1] = 0x7C; // canary + + BuildSwapModeTable(); // Needed for WriteStage + int numStages = bpmem.genMode.numtevstages + 1; + int numTexgen = bpmem.genMode.numtexgens; + + char *p = text; + WRITE(p, "//Pixel Shader for TEV stages\n"); + WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n", + numStages, numTexgen/*, bpmem.genMode.numindstages*/); + + int nIndirectStagesUsed = 0; + if (bpmem.genMode.numindstages > 0) + { + for (int i = 0; i < numStages; ++i) + { + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ; + // Declare samplers + + if(ApiType != API_D3D11) + { + WRITE(p, "uniform sampler2D "); + } + else + { + WRITE(p, "sampler "); + } + + bool bfirst = true; + for (int i = 0; i < 8; ++i) + { + WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",", i, i); + bfirst = false; + } + WRITE(p, ";\n"); + if(ApiType == API_D3D11) + { + WRITE(p, "Texture2D "); + bfirst = true; + for (int i = 0; i < 8; ++i) + { + WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i); + bfirst = false; + } + WRITE(p, ";\n"); + } + + WRITE(p, "\n"); + + WRITE(p, "uniform float4 "I_COLORS"[4] : register(c%d);\n", C_COLORS); + WRITE(p, "uniform float4 "I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); + WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA); + WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); + WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); + WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); + WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); + WRITE(p, "uniform float4 "I_FOG"[3] : register(c%d);\n", C_FOG); + + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + { + WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); + WRITE(p,"typedef struct { Light lights[8]; } s_"I_PLIGHTS";\n"); + WRITE(p, "uniform s_"I_PLIGHTS" "I_PLIGHTS" : register(c%d);\n", C_PLIGHTS); + WRITE(p, "typedef struct { float4 C0, C1, C2, C3; } s_"I_PMATERIALS";\n"); + WRITE(p, "uniform s_"I_PMATERIALS" "I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); + } + + WRITE(p, "void main(\n"); + if(ApiType != API_D3D11) + { + WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", + dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "", + DepthTextureEnable ? "\n out float depth : DEPTH," : "", + ApiType & API_OPENGL ? "WPOS" : ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS"); + } + else + { + WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", + dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", + DepthTextureEnable ? "\n out float depth : SV_Depth," : ""); + } + + WRITE(p, " in float4 colors_0 : COLOR0,\n"); + WRITE(p, " in float4 colors_1 : COLOR1"); + + // compute window position if needed because binding semantic WPOS is not widely supported + if (numTexgen < 7) + { + for (int i = 0; i < numTexgen; ++i) + WRITE(p, ",\n in float3 uv%d : TEXCOORD%d", i, i); + WRITE(p, ",\n in float4 clipPos : TEXCOORD%d", numTexgen); + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + WRITE(p, ",\n in float4 Normal : TEXCOORD%d", numTexgen + 1); + } + else + { + // wpos is in w of first 4 texcoords + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + { + for (int i = 0; i < 8; ++i) + WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i); + } + else + { + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + WRITE(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); + } + } + WRITE(p, " ) {\n"); + + char* pmainstart = p; + int Pretest = AlphaPreTest(); + if (dstAlphaMode == DSTALPHA_ALPHA_PASS && !DepthTextureEnable && Pretest >= 0) + { + if (!Pretest) + { + // alpha test will always fail, so restart the shader and just make it an empty function + WRITE(p, "ocol0 = 0;\n"); + WRITE(p, "discard;\n"); + if(ApiType != API_D3D11) + WRITE(p, "return;\n"); + } + else + { + WRITE(p, " ocol0 = "I_ALPHA"[0].aaaa;\n"); + } + WRITE(p, "}\n"); + return text; + } + + WRITE(p, " float4 c0 = "I_COLORS"[1], c1 = "I_COLORS"[2], c2 = "I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" + " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n" + " float4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n" + " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n" + " float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n" + " float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n" + " float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n" + " float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n"); + + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + { + if (xfregs.numTexGen.numTexGens < 7) + { + WRITE(p,"float3 _norm0 = normalize(Normal.xyz);\n\n"); + WRITE(p,"float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); + } + else + { + WRITE(p," float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); + WRITE(p,"float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); + } + + + WRITE(p, "float4 mat, lacc;\n" + "float3 ldir, h;\n" + "float dist, dist2, attn;\n"); + + p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); + } + + if (numTexgen < 7) + WRITE(p, "clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); + else + WRITE(p, "float4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); + + // HACK to handle cases where the tex gen is not enabled + if (numTexgen == 0) + { + WRITE(p, "float3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); + } + else + { + for (int i = 0; i < numTexgen; ++i) + { + // optional perspective divides + if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) + { + WRITE(p, "if (uv%d.z)", i); + WRITE(p, " uv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); + } + + WRITE(p, "uv%d.xy = uv%d.xy * "I_TEXDIMS"[%d].zw;\n", i, i, i); + } + } + + // indirect texture map lookup + for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) + { + if (nIndirectStagesUsed & (1<= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_GT 8 + " %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_EQ 9 + " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_GT 10 + " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_EQ 11 + " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_GT 12 + " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_EQ 13 + " %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)), float3(0.0f, 0.0f, 0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14 + " %s + ((float3(1.0f, 1.0f, 1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)), float3(0.0f, 0.0f, 0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15 +}; + +//table with the alpha compare operations +static const char *TEVCMPAlphaOPTable[16] = +{ + "0.0f",//0 + "0.0f",//1 + "0.0f",//2 + "0.0f",//3 + "0.0f",//4 + "0.0f",//5 + "0.0f",//6 + "0.0f",//7 + " %s.a + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_R8_GT 8 + " %s.a + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_R8_EQ 9 + " %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_GR16_GT 10 + " %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_GR16_EQ 11 + " %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_GT 12 + " %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_EQ 13 + " %s.a + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_A8_GT 14 + " %s.a + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s.a : 0.0f)"//#define TEVCMP_A8_EQ 15 + +}; + + +static void WriteStage(char *&p, int n, API_TYPE ApiType) +{ + int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); + bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; + bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages; + + // HACK to handle cases where the tex gen is not enabled + if (!bHasTexCoord) + texcoord = 0; + + WRITE(p, "// TEV stage %d\n", n); + + if (bHasIndStage) + { + WRITE(p, "// indirect op\n"); + // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords + if (bpmem.tevind[n].bs != ITBA_OFF) + { + WRITE(p, "alphabump = indtex%d.%s %s;\n", + bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs], + tevIndAlphaScale[bpmem.tevind[n].fmt]); + } + // format + WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); + + // bias + if (bpmem.tevind[n].bias != ITB_NONE ) + WRITE(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + + // multiply by offset matrix and scale + if (bpmem.tevind[n].mid != 0) + { + if (bpmem.tevind[n].mid <= 3) + { + int mtxidx = 2*(bpmem.tevind[n].mid-1); + WRITE(p, "float2 indtevtrans%d = float2(dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", + n, mtxidx, n, mtxidx+1, n); + } + else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) + { // s matrix + // TODO: Might become negative? + int mtxidx = 2*(bpmem.tevind[n].mid-5); + WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); + } + else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) + { // t matrix + // TODO: Might become negative? + int mtxidx = 2*(bpmem.tevind[n].mid-9); + WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); + } + else + WRITE(p, "float2 indtevtrans%d = 0;\n", n); + } + else + WRITE(p, "float2 indtevtrans%d = 0;\n", n); + + // --------- + // Wrapping + // --------- + + // wrap S + if (bpmem.tevind[n].sw == ITW_OFF) + WRITE(p, "wrappedcoord.x = uv%d.x;\n", texcoord); + else if (bpmem.tevind[n].sw == ITW_0) + WRITE(p, "wrappedcoord.x = 0.0f;\n"); + else + WRITE(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + + // wrap T + if (bpmem.tevind[n].tw == ITW_OFF) + WRITE(p, "wrappedcoord.y = uv%d.y;\n", texcoord); + else if (bpmem.tevind[n].tw == ITW_0) + WRITE(p, "wrappedcoord.y = 0.0f;\n"); + else + WRITE(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + + if (bpmem.tevind[n].fb_addprev) // add previous tevcoord + WRITE(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); + else + WRITE(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); + } + + TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; + TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; + + // blah1 + if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC + || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC + || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC + || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC + || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA + || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) + { + char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; + WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + } + + + if (bpmem.tevorders[n/2].getEnable(n&1)) + { + if(!bHasIndStage) + { + // calc tevcord + if(bHasTexCoord) + WRITE(p, "tevcoord.xy = uv%d.xy;\n", texcoord); + else + WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n"); + } + + char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; + int texmap = bpmem.tevorders[n/2].getTexMap(n&1); + SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType); + } + else + WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + + + // blah2 + if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST + || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) + { + int kc = bpmem.tevksel[n / 2].getKC(n & 1); + int ka = bpmem.tevksel[n / 2].getKA(n & 1); + WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + if(kc > 7 || ka > 7) + { + WRITE(p, "ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + } + else + { + WRITE(p, "ckonsttemp = konsttemp;\n"); + } + } + + if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV + || cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV + || cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV + || ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || ac.c == TEVALPHAARG_APREV) + WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + + + if(cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0 + || cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0 + || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 + || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0) + WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + + + if(cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1 + || cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1 + || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1 + || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1) + WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + + + if(cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2 + || cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2 + || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2 + || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2) + WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + + + WRITE(p, "// color combine\n"); + if (cc.clamp) + WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]); + else + WRITE(p, "%s = ", tevCOutputTable[cc.dest]); + + // combine the color channel + if (cc.bias != TevBias_COMPARE) // if not compare + { + //normal color combiner goes here + if (cc.shift > TEVSCALE_1) + WRITE(p, "%s*(", tevScaleTable[cc.shift]); + + if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) + WRITE(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); + + if (cc.a == cc.b) + WRITE(p, "%s", tevCInputTable[cc.a + 16]); + else if (cc.c == TEVCOLORARG_ZERO) + WRITE(p, "%s", tevCInputTable[cc.a + 16]); + else if (cc.c == TEVCOLORARG_ONE) + WRITE(p, "%s", tevCInputTable[cc.b + 16]); + else if (cc.a == TEVCOLORARG_ZERO) + WRITE(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + else if (cc.b == TEVCOLORARG_ZERO) + WRITE(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); + else + WRITE(p, "lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + + WRITE(p, "%s", tevBiasTable[cc.bias]); + + if (cc.shift > TEVSCALE_1) + WRITE(p, ")"); + } + else + { + int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here + WRITE(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table + tevCInputTable[cc.d], + tevCInputTable[cc.a + 16], + tevCInputTable[cc.b + 16], + tevCInputTable[cc.c + 16]); + } + if (cc.clamp) + WRITE(p, ")"); + WRITE(p,";\n"); + + WRITE(p, "// alpha combine\n"); + // combine the alpha channel + if (ac.clamp) + WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]); + else + WRITE(p, "%s = ", tevAOutputTable[ac.dest]); + + if (ac.bias != TevBias_COMPARE) // if not compare + { + //normal alpha combiner goes here + if (ac.shift > TEVSCALE_1) + WRITE(p, "%s*(", tevScaleTable[ac.shift]); + + if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) + WRITE(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); + + if (ac.a == ac.b) + WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + else if (ac.c == TEVALPHAARG_ZERO) + WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + else if (ac.a == TEVALPHAARG_ZERO) + WRITE(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + else if (ac.b == TEVALPHAARG_ZERO) + WRITE(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); + else + WRITE(p, "lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + + WRITE(p, "%s",tevBiasTable[ac.bias]); + + if (ac.shift>0) + WRITE(p, ")"); + + } + else + { + //compare alpha combiner goes here + int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here + WRITE(p, TEVCMPAlphaOPTable[cmp], + tevAInputTable[ac.d], + tevAInputTable[ac.a + 8], + tevAInputTable[ac.b + 8], + tevAInputTable[ac.c + 8]); + } + if (ac.clamp) + WRITE(p, ")"); + WRITE(p, ";\n\n"); + WRITE(p, "// TEV done\n"); +} + +void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) +{ + if (ApiType == API_D3D11) + WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); + else + WRITE(p, "%s=tex2D(samp%d,%s.xy * "I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); +} + +static const char *tevAlphaFuncsTable[] = +{ + "(false)", //ALPHACMP_NEVER 0 + "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 + "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 + "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 + "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4 + "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5 + "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6 + "(true)" //ALPHACMP_ALWAYS 7 +}; + +static const char *tevAlphaFunclogicTable[] = +{ + " && ", // and + " || ", // or + " != ", // xor + " == " // xnor +}; +static int AlphaPreTest() +{ + u32 op = bpmem.alphaFunc.logic; + u32 comp[2] = {bpmem.alphaFunc.comp0, bpmem.alphaFunc.comp1}; + + // First kill all the simple cases + switch(op) + { + case 0: // AND + if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true; + if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) return false; + break; + case 1: // OR + if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true; + if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)return false; + break; + case 2: // XOR + if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) + return true; + if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) + return false; + break; + case 3: // XNOR + if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) + return false; + if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) + return true; + break; + default: PanicAlert("bad logic for alpha test? %08x", op); + } + return -1; +} + + +static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode) +{ + static const char *alphaRef[2] = + { + I_ALPHA"[0].r", + I_ALPHA"[0].g" + }; + + int Pretest = AlphaPreTest(); + if(Pretest >= 0) + { + return Pretest != 0; + } + + // using discard then return works the same in cg and dx9 but not in dx11 + WRITE(p, "if(!( "); + + int compindex = bpmem.alphaFunc.comp0 % 8; + WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table + + WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);//lookup the logic op + + compindex = bpmem.alphaFunc.comp1 % 8; + WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table + WRITE(p, ")){ocol0 = 0;%s%s discard;%s}\n", + dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "", + DepthTextureEnable ? "depth = 1.f;" : "", + (ApiType != API_D3D11) ? "return;" : ""); + return true; +} + +static const char *tevFogFuncsTable[] = +{ + "", //No Fog + "", //? + "", //Linear + "", //? + " fog = 1.0f - pow(2.0f, -8.0f * fog);\n", //exp + " fog = 1.0f - pow(2.0f, -8.0f * fog * fog);\n", //exp2 + " fog = pow(2.0f, -8.0f * (1.0f - fog));\n", //backward exp + " fog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" //backward exp2 +}; + +static void WriteFog(char *&p) +{ + if(bpmem.fog.c_proj_fsel.fsel == 0)return;//no Fog + + if (bpmem.fog.c_proj_fsel.proj == 0) + { + // perspective + // ze = A/(B - (Zs >> B_SHF) + WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - (zCoord / "I_FOG"[1].w));\n"); + } + else + { + // orthographic + // ze = a*Zs (here, no B_SHF) + WRITE (p, " float ze = "I_FOG"[1].x * zCoord;\n"); + } + + // x_adjust = sqrt((x-center)^2 + k^2)/k + // ze *= x_adjust + //this is complitly teorical as the real hard seems to use a table intead of calculate the values. + if(bpmem.fogRange.Base.Enabled) + { + WRITE (p, " float x_adjust = (2.0f * (clipPos.x / "I_FOG"[2].y)) - 1.0f - "I_FOG"[2].x;\n"); + WRITE (p, " x_adjust = sqrt(x_adjust * x_adjust + "I_FOG"[2].z * "I_FOG"[2].z) / "I_FOG"[2].z;\n"); + WRITE (p, " ze *= x_adjust;\n"); + } + + WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n"); + + if(bpmem.fog.c_proj_fsel.fsel > 3) + { + WRITE(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); + } + else + { + if(bpmem.fog.c_proj_fsel.fsel != 2) + WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); + } + + WRITE(p, " prev.rgb = lerp(prev.rgb,"I_FOG"[0].rgb,fog);\n"); + + } \ No newline at end of file diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index a492d5a201..8400fde9a5 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -1,503 +1,503 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include -#include - -#include "NativeVertexFormat.h" - -#include "BPMemory.h" -#include "CPMemory.h" -#include "LightingShaderGen.h" -#include "VertexShaderGen.h" -#include "VideoConfig.h" - -VERTEXSHADERUID last_vertex_shader_uid; - -// Mash together all the inputs that contribute to the code of a generated vertex shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) -{ - uid->values[0] = components | - (xfregs.numTexGen.numTexGens << 23) | - (xfregs.numChan.numColorChans << 27) | - (xfregs.dualTexTrans.enabled << 29); - - // TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here? - GetLightingShaderId(&uid->values[1]); - - uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31; - u32 *pcurvalue = &uid->values[3]; - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - TexMtxInfo tinfo = xfregs.texMtxInfo[i]; - if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) - tinfo.hex &= 0x7ff; - if (tinfo.texgentype != XF_TEXGEN_REGULAR) - tinfo.projection = 0; - - u32 val = ((tinfo.hex >> 1) & 0x1ffff); - if (xfregs.dualTexTrans.enabled && tinfo.texgentype == XF_TEXGEN_REGULAR) { - // rewrite normalization and post index - val |= ((u32)xfregs.postMtxInfo[i].index << 17) | ((u32)xfregs.postMtxInfo[i].normalize << 23); - } - - switch (i & 3) { - case 0: pcurvalue[0] |= val; break; - case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break; - case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break; - case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break; - } - } -} - -void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components) -{ - // Just store all used registers here without caring whether we need all bits or less. - u32* ptr = uid->values; - *ptr++ = components; - *ptr++ = xfregs.numTexGen.hex; - *ptr++ = xfregs.numChan.hex; - *ptr++ = xfregs.dualTexTrans.hex; - - for (int i = 0; i < 2; ++i) { - *ptr++ = xfregs.color[i].hex; - *ptr++ = xfregs.alpha[i].hex; - } - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - for (unsigned int i = 0; i < 8; ++i) { - *ptr++ = xfregs.texMtxInfo[i].hex; - *ptr++ = xfregs.postMtxInfo[i].hex; - } - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - - -void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - VERTEXSHADERUIDSAFE new_id; - GetSafeVertexShaderId(&new_id, components); - - if (!(old_id == new_id)) - { - std::string new_code(GenerateVertexShaderCode(components, api)); - if (old_code != new_code) - { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file(szTemp); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); - } - } -} - - -static char text[16384]; - -#define WRITE p+=sprintf - -char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type) -{ - WRITE(p, "struct VS_OUTPUT {\n"); - WRITE(p, " float4 pos : POSITION;\n"); - WRITE(p, " float4 colors_0 : COLOR0;\n"); - WRITE(p, " float4 colors_1 : COLOR1;\n"); - - if (xfregs.numTexGen.numTexGens < 7) { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " float3 tex%d : TEXCOORD%d;\n", i, i); - WRITE(p, " float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, " float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); - } else { - // clip position is in w of first 4 texcoords - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - for (int i = 0; i < 8; ++i) - WRITE(p, " float4 tex%d : TEXCOORD%d;\n", i, i); - } - else - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " float%d tex%d : TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, i); - } - } - WRITE(p, "};\n"); - - return p; -} - -const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type) -{ - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - text[sizeof(text) - 1] = 0x7C; // canary - - _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); - _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); - - bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); - u32 lightMask = 0; - if (xfregs.numChan.numColorChans > 0) - lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); - if (xfregs.numChan.numColorChans > 1) - lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); - - char *p = text; - WRITE(p, "//Vertex Shader: comp:%x, \n", components); - WRITE(p, "typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_"I_POSNORMALMATRIX";\n" - "typedef struct { float4 t; } FLT4;\n" - "typedef struct { FLT4 T[24]; } s_"I_TEXMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_"I_TRANSFORMMATRICES";\n" - "typedef struct { FLT4 T[32]; } s_"I_NORMALMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_"I_POSTTRANSFORMMATRICES";\n" - "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" - "typedef struct { Light lights[8]; } s_"I_LIGHTS";\n" - "typedef struct { float4 C0, C1, C2, C3; } s_"I_MATERIALS";\n" - "typedef struct { float4 T0, T1, T2, T3; } s_"I_PROJECTION";\n" - ); - - p = GenerateVSOutputStruct(p, components, api_type); - - // uniforms - - WRITE(p, "uniform s_"I_TRANSFORMMATRICES" "I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); - WRITE(p, "uniform s_"I_TEXMATRICES" "I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); // also using tex matrices - WRITE(p, "uniform s_"I_NORMALMATRICES" "I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); - WRITE(p, "uniform s_"I_POSNORMALMATRIX" "I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); - WRITE(p, "uniform s_"I_POSTTRANSFORMMATRICES" "I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); - WRITE(p, "uniform s_"I_LIGHTS" "I_LIGHTS" : register(c%d);\n", C_LIGHTS); - WRITE(p, "uniform s_"I_MATERIALS" "I_MATERIALS" : register(c%d);\n", C_MATERIALS); - WRITE(p, "uniform s_"I_PROJECTION" "I_PROJECTION" : register(c%d);\n", C_PROJECTION); - WRITE(p, "uniform float4 "I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); - - WRITE(p, "VS_OUTPUT main(\n"); - - // inputs - if (components & VB_HAS_NRM0) - WRITE(p, " float3 rawnorm0 : NORMAL0,\n"); - if (components & VB_HAS_NRM1) { - if (is_d3d) - WRITE(p, " float3 rawnorm1 : NORMAL1,\n"); - else - WRITE(p, " float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); - } - if (components & VB_HAS_NRM2) { - if (is_d3d) - WRITE(p, " float3 rawnorm2 : NORMAL2,\n"); - else - WRITE(p, " float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); - } - if (components & VB_HAS_COL0) - WRITE(p, " float4 color0 : COLOR0,\n"); - if (components & VB_HAS_COL1) - WRITE(p, " float4 color1 : COLOR1,\n"); - for (int i = 0; i < 8; ++i) { - u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); - WRITE(p, "float3 N0 = "I_NORMALMATRICES".T[normidx].t.xyz, N1 = "I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = "I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); - } - - if (components & VB_HAS_NRM0) - WRITE(p, "float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) - WRITE(p, "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); - if (components & VB_HAS_NRM2) - WRITE(p, "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); - } - else - { - WRITE(p, "float4 pos = float4(dot("I_POSNORMALMATRIX".T0, rawpos), dot("I_POSNORMALMATRIX".T1, rawpos), dot("I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); - if (components & VB_HAS_NRM0) - WRITE(p, "float3 _norm0 = normalize(float3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) - WRITE(p, "float3 _norm1 = float3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); - if (components & VB_HAS_NRM2) - WRITE(p, "float3 _norm2 = float3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); - } - - if (!(components & VB_HAS_NRM0)) - WRITE(p, "float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); - - - - WRITE(p, "o.pos = float4(dot("I_PROJECTION".T0, pos), dot("I_PROJECTION".T1, pos), dot("I_PROJECTION".T2, pos), dot("I_PROJECTION".T3, pos));\n"); - - WRITE(p, "float4 mat, lacc;\n" - "float3 ldir, h;\n" - "float dist, dist2, attn;\n"); - - if(xfregs.numChan.numColorChans == 0) - { - if (components & VB_HAS_COL0) - WRITE(p, "o.colors_0 = color0;\n"); - else - WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - - // TODO: This probably isn't necessary if pixel lighting is enabled. - p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); - - if(xfregs.numChan.numColorChans < 2) - { - if (components & VB_HAS_COL1) - WRITE(p, "o.colors_1 = color1;\n"); - else - WRITE(p, "o.colors_1 = o.colors_0;\n"); - } - // special case if only pos and tex coord 0 and tex coord input is AB11 - // donko - this has caused problems in some games. removed for now. - bool texGenSpecialCase = false; - /*bool texGenSpecialCase = - ((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 - (g_VtxDesc.Tex0Coord != NOT_PRESENT) && - (xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); - */ - - // transform texcoords - WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; - - WRITE(p, "{\n"); - WRITE(p, "coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - switch (texinfo.sourcerow) { - case XF_SRCGEOM_INROW: - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = rawpos;\n"); // pos.w is 1 - break; - case XF_SRCNORMAL_INROW: - if (components & VB_HAS_NRM0) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n"); - } - break; - case XF_SRCCOLORS_INROW: - _assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 ); - break; - case XF_SRCBINORMAL_T_INROW: - if (components & VB_HAS_NRM1) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n"); - } - break; - case XF_SRCBINORMAL_B_INROW: - if (components & VB_HAS_NRM2) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n"); - } - break; - default: - _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); - if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) - WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); - break; - } - - // first transformation - switch (texinfo.texgentype) { - case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map - - if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { - // transform the light dir into tangent space - WRITE(p, "ldir = normalize("I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); - WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); - } - else - { - _assert_(0); // should have normals - WRITE(p, "o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); - } - - break; - case XF_TEXGEN_COLOR_STRGBC0: - _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - WRITE(p, "o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); - break; - case XF_TEXGEN_COLOR_STRGBC1: - _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - WRITE(p, "o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); - break; - case XF_TEXGEN_REGULAR: - default: - if (components & (VB_HAS_TEXMTXIDX0< +#include + +#include "NativeVertexFormat.h" + +#include "BPMemory.h" +#include "CPMemory.h" +#include "LightingShaderGen.h" +#include "VertexShaderGen.h" +#include "VideoConfig.h" + +VERTEXSHADERUID last_vertex_shader_uid; + +// Mash together all the inputs that contribute to the code of a generated vertex shader into +// a unique identifier, basically containing all the bits. Yup, it's a lot .... +void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) +{ + uid->values[0] = components | + (xfregs.numTexGen.numTexGens << 23) | + (xfregs.numChan.numColorChans << 27) | + (xfregs.dualTexTrans.enabled << 29); + + // TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here? + GetLightingShaderId(&uid->values[1]); + + uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31; + u32 *pcurvalue = &uid->values[3]; + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { + TexMtxInfo tinfo = xfregs.texMtxInfo[i]; + if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) + tinfo.hex &= 0x7ff; + if (tinfo.texgentype != XF_TEXGEN_REGULAR) + tinfo.projection = 0; + + u32 val = ((tinfo.hex >> 1) & 0x1ffff); + if (xfregs.dualTexTrans.enabled && tinfo.texgentype == XF_TEXGEN_REGULAR) { + // rewrite normalization and post index + val |= ((u32)xfregs.postMtxInfo[i].index << 17) | ((u32)xfregs.postMtxInfo[i].normalize << 23); + } + + switch (i & 3) { + case 0: pcurvalue[0] |= val; break; + case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break; + case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break; + case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break; + } + } +} + +void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components) +{ + // Just store all used registers here without caring whether we need all bits or less. + u32* ptr = uid->values; + *ptr++ = components; + *ptr++ = xfregs.numTexGen.hex; + *ptr++ = xfregs.numChan.hex; + *ptr++ = xfregs.dualTexTrans.hex; + + for (int i = 0; i < 2; ++i) { + *ptr++ = xfregs.color[i].hex; + *ptr++ = xfregs.alpha[i].hex; + } + *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; + for (unsigned int i = 0; i < 8; ++i) { + *ptr++ = xfregs.texMtxInfo[i].hex; + *ptr++ = xfregs.postMtxInfo[i].hex; + } + _assert_((ptr - uid->values) == uid->GetNumValues()); +} + + +void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components) +{ + if (!g_ActiveConfig.bEnableShaderDebugging) + return; + + VERTEXSHADERUIDSAFE new_id; + GetSafeVertexShaderId(&new_id, components); + + if (!(old_id == new_id)) + { + std::string new_code(GenerateVertexShaderCode(components, api)); + if (old_code != new_code) + { + _assert_(old_id.GetNumValues() == new_id.GetNumValues()); + + char msg[8192]; + char* ptr = msg; + ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); + const int N = new_id.GetNumValues(); + for (int i = 0; i < N/2; ++i) + ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], + new_id.values[2*i], new_id.values[2*i+1]); + if (N % 2) + ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); + + static int num_failures = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); + std::ofstream file(szTemp); + file << msg; + file << "\n\nOld shader code:\n" << old_code; + file << "\n\nNew shader code:\n" << new_code; + file.close(); + + PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); + } + } +} + + +static char text[16384]; + +#define WRITE p+=sprintf + +char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type) +{ + WRITE(p, "struct VS_OUTPUT {\n"); + WRITE(p, " float4 pos : POSITION;\n"); + WRITE(p, " float4 colors_0 : COLOR0;\n"); + WRITE(p, " float4 colors_1 : COLOR1;\n"); + + if (xfregs.numTexGen.numTexGens < 7) { + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + WRITE(p, " float3 tex%d : TEXCOORD%d;\n", i, i); + WRITE(p, " float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + WRITE(p, " float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); + } else { + // clip position is in w of first 4 texcoords + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + { + for (int i = 0; i < 8; ++i) + WRITE(p, " float4 tex%d : TEXCOORD%d;\n", i, i); + } + else + { + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + WRITE(p, " float%d tex%d : TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, i); + } + } + WRITE(p, "};\n"); + + return p; +} + +const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type) +{ + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + text[sizeof(text) - 1] = 0x7C; // canary + + _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); + _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); + + bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); + u32 lightMask = 0; + if (xfregs.numChan.numColorChans > 0) + lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); + if (xfregs.numChan.numColorChans > 1) + lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); + + char *p = text; + WRITE(p, "//Vertex Shader: comp:%x, \n", components); + WRITE(p, "typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_"I_POSNORMALMATRIX";\n" + "typedef struct { float4 t; } FLT4;\n" + "typedef struct { FLT4 T[24]; } s_"I_TEXMATRICES";\n" + "typedef struct { FLT4 T[64]; } s_"I_TRANSFORMMATRICES";\n" + "typedef struct { FLT4 T[32]; } s_"I_NORMALMATRICES";\n" + "typedef struct { FLT4 T[64]; } s_"I_POSTTRANSFORMMATRICES";\n" + "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" + "typedef struct { Light lights[8]; } s_"I_LIGHTS";\n" + "typedef struct { float4 C0, C1, C2, C3; } s_"I_MATERIALS";\n" + "typedef struct { float4 T0, T1, T2, T3; } s_"I_PROJECTION";\n" + ); + + p = GenerateVSOutputStruct(p, components, api_type); + + // uniforms + + WRITE(p, "uniform s_"I_TRANSFORMMATRICES" "I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); + WRITE(p, "uniform s_"I_TEXMATRICES" "I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); // also using tex matrices + WRITE(p, "uniform s_"I_NORMALMATRICES" "I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); + WRITE(p, "uniform s_"I_POSNORMALMATRIX" "I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); + WRITE(p, "uniform s_"I_POSTTRANSFORMMATRICES" "I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); + WRITE(p, "uniform s_"I_LIGHTS" "I_LIGHTS" : register(c%d);\n", C_LIGHTS); + WRITE(p, "uniform s_"I_MATERIALS" "I_MATERIALS" : register(c%d);\n", C_MATERIALS); + WRITE(p, "uniform s_"I_PROJECTION" "I_PROJECTION" : register(c%d);\n", C_PROJECTION); + WRITE(p, "uniform float4 "I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); + + WRITE(p, "VS_OUTPUT main(\n"); + + // inputs + if (components & VB_HAS_NRM0) + WRITE(p, " float3 rawnorm0 : NORMAL0,\n"); + if (components & VB_HAS_NRM1) { + if (is_d3d) + WRITE(p, " float3 rawnorm1 : NORMAL1,\n"); + else + WRITE(p, " float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); + } + if (components & VB_HAS_NRM2) { + if (is_d3d) + WRITE(p, " float3 rawnorm2 : NORMAL2,\n"); + else + WRITE(p, " float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); + } + if (components & VB_HAS_COL0) + WRITE(p, " float4 color0 : COLOR0,\n"); + if (components & VB_HAS_COL1) + WRITE(p, " float4 color1 : COLOR1,\n"); + for (int i = 0; i < 8; ++i) { + u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); + WRITE(p, "float3 N0 = "I_NORMALMATRICES".T[normidx].t.xyz, N1 = "I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = "I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); + } + + if (components & VB_HAS_NRM0) + WRITE(p, "float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); + if (components & VB_HAS_NRM1) + WRITE(p, "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); + if (components & VB_HAS_NRM2) + WRITE(p, "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); + } + else + { + WRITE(p, "float4 pos = float4(dot("I_POSNORMALMATRIX".T0, rawpos), dot("I_POSNORMALMATRIX".T1, rawpos), dot("I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); + if (components & VB_HAS_NRM0) + WRITE(p, "float3 _norm0 = normalize(float3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); + if (components & VB_HAS_NRM1) + WRITE(p, "float3 _norm1 = float3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); + if (components & VB_HAS_NRM2) + WRITE(p, "float3 _norm2 = float3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); + } + + if (!(components & VB_HAS_NRM0)) + WRITE(p, "float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); + + + + WRITE(p, "o.pos = float4(dot("I_PROJECTION".T0, pos), dot("I_PROJECTION".T1, pos), dot("I_PROJECTION".T2, pos), dot("I_PROJECTION".T3, pos));\n"); + + WRITE(p, "float4 mat, lacc;\n" + "float3 ldir, h;\n" + "float dist, dist2, attn;\n"); + + if(xfregs.numChan.numColorChans == 0) + { + if (components & VB_HAS_COL0) + WRITE(p, "o.colors_0 = color0;\n"); + else + WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + } + + // TODO: This probably isn't necessary if pixel lighting is enabled. + p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + + if(xfregs.numChan.numColorChans < 2) + { + if (components & VB_HAS_COL1) + WRITE(p, "o.colors_1 = color1;\n"); + else + WRITE(p, "o.colors_1 = o.colors_0;\n"); + } + // special case if only pos and tex coord 0 and tex coord input is AB11 + // donko - this has caused problems in some games. removed for now. + bool texGenSpecialCase = false; + /*bool texGenSpecialCase = + ((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 + (g_VtxDesc.Tex0Coord != NOT_PRESENT) && + (xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); + */ + + // transform texcoords + WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { + TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; + + WRITE(p, "{\n"); + WRITE(p, "coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + switch (texinfo.sourcerow) { + case XF_SRCGEOM_INROW: + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + WRITE(p, "coord = rawpos;\n"); // pos.w is 1 + break; + case XF_SRCNORMAL_INROW: + if (components & VB_HAS_NRM0) { + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n"); + } + break; + case XF_SRCCOLORS_INROW: + _assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 ); + break; + case XF_SRCBINORMAL_T_INROW: + if (components & VB_HAS_NRM1) { + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n"); + } + break; + case XF_SRCBINORMAL_B_INROW: + if (components & VB_HAS_NRM2) { + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n"); + } + break; + default: + _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); + if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) + WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); + break; + } + + // first transformation + switch (texinfo.texgentype) { + case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map + + if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { + // transform the light dir into tangent space + WRITE(p, "ldir = normalize("I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); + WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); + } + else + { + _assert_(0); // should have normals + WRITE(p, "o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); + } + + break; + case XF_TEXGEN_COLOR_STRGBC0: + _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); + WRITE(p, "o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); + break; + case XF_TEXGEN_COLOR_STRGBC1: + _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); + WRITE(p, "o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); + break; + case XF_TEXGEN_REGULAR: + default: + if (components & (VB_HAS_TEXMTXIDX0<