Move out Cg shader generators to VideoCommon (hope to use this in the DX plugin in the future). Also move out stats code. Comment a lot and cleanup. Kill DX9 Globals.cpp.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@938 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard
2008-10-22 20:54:40 +00:00
parent 32820d2036
commit 39df6c5624
41 changed files with 416 additions and 416 deletions

View File

@ -60,6 +60,4 @@ void OpenGL_SetWindowText(const char *text);
void OpenGL_Shutdown();
void OpenGL_Update();
#endif

View File

@ -31,13 +31,6 @@
Config g_Config;
Statistics stats;
void Statistics::ResetFrame()
{
memset(&thisFrame, 0, sizeof(ThisFrame));
}
Config::Config()
{
memset(this, 0, sizeof(Config));

View File

@ -140,62 +140,6 @@ struct Config
extern Config g_Config;
struct Statistics
{
int numPrimitives;
int numPixelShadersCreated;
int numPixelShadersAlive;
int numVertexShadersCreated;
int numVertexShadersAlive;
int numTexturesCreated;
int numTexturesAlive;
int numRenderTargetsCreated;
int numRenderTargetsAlive;
int numDListsCalled;
int numDListsCreated;
int numDListsAlive;
int numJoins;
struct ThisFrame
{
int numBPLoads;
int numCPLoads;
int numXFLoads;
int numBPLoadsInDL;
int numCPLoadsInDL;
int numXFLoadsInDL;
int numDLs;
int numDLPrims;
int numPrims;
int numShaderChanges;
int numDListsCalled;
};
ThisFrame thisFrame;
void ResetFrame();
};
extern Statistics stats;
#define STATISTICS
#ifdef STATISTICS
#define INCSTAT(a) (a)++;
#define ADDSTAT(a,b) (a)+=(b);
#define SETSTAT(a,x) (a)=(int)(x);
#else
#define INCSTAT(a) ;
#define ADDSTAT(a,b) ;
#define SETSTAT(a,x) ;
#endif
void DebugLog(const char* _fmt, ...);
void __Log(const char *format, ...);
void __Log(int type, const char *format, ...);

View File

@ -30,6 +30,7 @@
#include "VertexLoader.h"
#include "VertexManager.h"
#include "VertexShaderManager.h"
#include "Statistics.h"
#include "BPStructs.h"
#include "Fifo.h"
@ -44,14 +45,6 @@ extern u8* FAKE_GetFifoEndPtr();
void Decode();
template <class T>
void Xchg(T& a, T&b)
{
T c = a;
a = b;
b = c;
}
void ExecuteDisplayList(u32 address, u32 size)
{
u8* old_pVideoData = g_pVideoData;
@ -59,13 +52,10 @@ void ExecuteDisplayList(u32 address, u32 size)
u8* startAddress = Memory_GetPtr(address);
g_pVideoData = startAddress;
// temporarily swap dl and non-dl(small "hack" for the stats)
Xchg(stats.thisFrame.numDLPrims, stats.thisFrame.numPrims);
Xchg(stats.thisFrame.numXFLoadsInDL, stats.thisFrame.numXFLoads);
Xchg(stats.thisFrame.numCPLoadsInDL, stats.thisFrame.numCPLoads);
Xchg(stats.thisFrame.numBPLoadsInDL, stats.thisFrame.numBPLoads);
while((u32)(g_pVideoData - startAddress) < size)
// temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL();
while((u32)(g_pVideoData - startAddress) < size)
{
Decode();
}
@ -73,10 +63,7 @@ void ExecuteDisplayList(u32 address, u32 size)
INCSTAT(stats.thisFrame.numDListsCalled);
// un-swap
Xchg(stats.thisFrame.numDLPrims, stats.thisFrame.numPrims);
Xchg(stats.thisFrame.numXFLoadsInDL, stats.thisFrame.numXFLoads);
Xchg(stats.thisFrame.numCPLoadsInDL, stats.thisFrame.numCPLoads);
Xchg(stats.thisFrame.numBPLoadsInDL, stats.thisFrame.numBPLoads);
Statistics::SwapDL();
// reset to the old pointer
g_pVideoData = old_pVideoData;

View File

@ -1,846 +0,0 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Globals.h"
#include <stdio.h>
#include <cmath>
#include <assert.h>
#include "Profiler.h"
#include "PixelShader.h"
#include "XFMemory.h" // for texture projection mode
#include "BPMemory.h"
// old tev->pixelshader notes
//
// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0
// konstant for this stage (alpha, color) is given by bpmem.tevksel
// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current chan color
// according to GXTevColorArg table above
// output is given by .outreg
// tevtemp is set according to swapmodetables and
static void WriteStage(char *&p, int n, u32 texture_mask);
static void WrapNonPow2Tex(char* &p, const char* var, int texmap, u32 texture_mask);
static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p);
const float epsilon8bit = 1.0f / 255.0f;
static const char *tevKSelTableC[] = // KCSEL
{
"1.0f,1.0f,1.0f", //1 = 0x00
"0.875,0.875,0.875",//7_8 = 0x01
"0.75,0.75,0.75", //3_4 = 0x02
"0.625,0.625,0.625",//5_8 = 0x03
"0.5,0.5,0.5", //1_2 = 0x04
"0.375,0.375,0.375",//3_8 = 0x05
"0.25,0.25,0.25", //1_4 = 0x06
"0.125,0.125,0.125",//1_8 = 0x07
"ERROR", //0x08
"ERROR", //0x09
"ERROR", //0x0a
"ERROR", //0x0b
I_KCOLORS"[0].rgb",//K0 = 0x0C
I_KCOLORS"[1].rgb",//K1 = 0x0D
I_KCOLORS"[2].rgb",//K2 = 0x0E
I_KCOLORS"[3].rgb",//K3 = 0x0F
I_KCOLORS"[0].rrr",//K0_R = 0x10
I_KCOLORS"[1].rrr",//K1_R = 0x11
I_KCOLORS"[2].rrr",//K2_R = 0x12
I_KCOLORS"[3].rrr",//K3_R = 0x13
I_KCOLORS"[0].ggg",//K0_G = 0x14
I_KCOLORS"[1].ggg",//K1_G = 0x15
I_KCOLORS"[2].ggg",//K2_G = 0x16
I_KCOLORS"[3].ggg",//K3_G = 0x17
I_KCOLORS"[0].bbb",//K0_B = 0x18
I_KCOLORS"[1].bbb",//K1_B = 0x19
I_KCOLORS"[2].bbb",//K2_B = 0x1A
I_KCOLORS"[3].bbb",//K3_B = 0x1B
I_KCOLORS"[0].aaa",//K0_A = 0x1C
I_KCOLORS"[1].aaa",//K1_A = 0x1D
I_KCOLORS"[2].aaa",//K2_A = 0x1E
I_KCOLORS"[3].aaa",//K3_A = 0x1F
};
static const char *tevKSelTableA[] = // KASEL
{
"1.0f", //1 = 0x00
"0.875f",//7_8 = 0x01
"0.75f", //3_4 = 0x02
"0.625f",//5_8 = 0x03
"0.5f", //1_2 = 0x04
"0.375f",//3_8 = 0x05
"0.25f", //1_4 = 0x06
"0.125f",//1_8 = 0x07
"ERROR", //0x08
"ERROR", //0x09
"ERROR", //0x0a
"ERROR", //0x0b
"ERROR", //0x0c
"ERROR", //0x0d
"ERROR", //0x0e
"ERROR", //0x0f
I_KCOLORS"[0].r",//K0_R = 0x10
I_KCOLORS"[1].r",//K1_R = 0x11
I_KCOLORS"[2].r",//K2_R = 0x12
I_KCOLORS"[3].r",//K3_R = 0x13
I_KCOLORS"[0].g",//K0_G = 0x14
I_KCOLORS"[1].g",//K1_G = 0x15
I_KCOLORS"[2].g",//K2_G = 0x16
I_KCOLORS"[3].g",//K3_G = 0x17
I_KCOLORS"[0].b",//K0_B = 0x18
I_KCOLORS"[1].b",//K1_B = 0x19
I_KCOLORS"[2].b",//K2_B = 0x1A
I_KCOLORS"[3].b",//K3_B = 0x1B
I_KCOLORS"[0].a",//K0_A = 0x1C
I_KCOLORS"[1].a",//K1_A = 0x1D
I_KCOLORS"[2].a",//K2_A = 0x1E
I_KCOLORS"[3].a",//K3_A = 0x1F
};
static const char *tevScaleTable[] = // CS
{
"1.0f", //SCALE_1
"2.0f", //SCALE_2
"4.0f", //SCALE_4
"0.5f",//DIVIDE_2
};
static const char *tevBiasTable[] = // TB
{
"", //ZERO,
"+0.5f", //ADDHALF,
"-0.5f", //SUBHALF,
"",
};
static const char *tevOpTable[] = { // TEV
"+", //TEVOP_ADD = 0,
"-", //TEVOP_SUB = 1,
};
//static const char *tevCompOpTable[] = { ">", "==" };
#define TEVCMP_R8 0
#define TEVCMP_GR16 1
#define TEVCMP_BGR24 2
#define TEVCMP_RGB8 3
static const char *tevCInputTable[] = // CC
{
"prev.rgb", //CPREV,
"prev.aaa", //APREV,
"c0.rgb", //C0,
"c0.aaa", //A0,
"c1.rgb", //C1,
"c1.aaa", //A1,
"c2.rgb", //C2,
"c2.aaa", //A2,
"textemp.rgb", //TEXC,
"textemp.aaa", //TEXA,
"rastemp.rgb", //RASC,
"rastemp.aaa", //RASA,
"float3(1.0f,1.0f,1.0f)", //ONE,
"float3(.5f,.5f,.5f)", //HALF,
"konsttemp.rgb", //KONST,
"float3(0.0f,0.0f,0.0f)", //ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
};
static const char *tevCInputTable2[] = // CC
{
"prev", //CPREV,
"(prev.aaa)", //APREV,
"c0", //C0,
"(c0.aaa)", //A0,
"c1", //C1,
"(c1.aaa)", //A1,
"c2", //C2,
"(c2.aaa)", //A2,
"textemp", //TEXC,
"(textemp.aaa)", //TEXA,
"rastemp", //RASC,
"(rastemp.aaa)", //RASA,
"float3(1.0f,1.0f,1.0f)", //ONE,
"float3(.5f,.5f,.5f)", //HALF,
"konsttemp", //"konsttemp.rgb", //KONST,
"float3(0.0f,0.0f,0.0f)", //ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
};
static const char *tevAInputTable[] = // CA
{
"prev.a", //APREV,
"c0.a", //A0,
"c1.a", //A1,
"c2.a", //A2,
"textemp.a", //TEXA,
"rastemp.a", //RASA,
"konsttemp.a", //KONST
"0.0", //ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR",
};
static const char *tevAInputTable2[] = // CA
{
"prev", //APREV,
"c0", //A0,
"c1", //A1,
"c2", //A2,
"textemp", //TEXA,
"rastemp", //RASA,
"konsttemp", //KONST, (hw1 had quarter)
"float4(0,0,0,0)", //ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
};
static const char *tevRasTable[] =
{
"colors[0]",
"colors[1]",
"ERROR", //2
"ERROR", //3
"ERROR", //4
"alphabump", // use bump alpha
"(alphabump*(255.0f/248.0f))", //normalized
"float4(0,0,0,0)", // zero
};
static const char *alphaRef[2] =
{
I_ALPHA"[0].x",
I_ALPHA"[0].y"
};
//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" };
static const char* tevIndAlphaSel[] = {"", "x", "y", "z"};
static const char* tevIndAlphaScale[] = {"", "*32","*16","*8"};
static const char* tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
static const char* tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
static const char* tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" };
static const char* tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" };
#define WRITE p+=sprintf
static const char *swapColors = "rgba";
static char swapModeTable[4][5];
static char text[16384];
static void BuildSwapModeTable()
{
//bpmem.tevregs[0].
for (int i = 0; i < 4; i++)
{
swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2];
swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1];
swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2];
swapModeTable[i][4] = 0;
}
}
char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0)
{
text[sizeof(text) - 1] = 0x7C; // canary
DVSTARTPROFILE();
BuildSwapModeTable();
int numStages = bpmem.genMode.numtevstages + 1;
int numTexgen = bpmem.genMode.numtexgens;
char *p = text;
WRITE(p, "//Pixel Shader for TEV stages\n");
WRITE(p, "//%i TEV stages, %i texgens, %i IND stages\n",
numStages, numTexgen, bpmem.genMode.numindstages);
bool bRenderZ = has_zbuffer_target && bpmem.zmode.updateenable;
bool bOutputZ = bpmem.ztex2.op != ZTEXTURE_DISABLE;
bool bInputZ = bpmem.ztex2.op==ZTEXTURE_ADD || bRenderZ;
// bool bRenderZToCol0 = ; // output z and alpha to color0
assert( !bRenderZToCol0 || bRenderZ );
int ztexcoord = -1;
if (bInputZ)
ztexcoord = numTexgen == 0 ? 0 : numTexgen-1;
int nIndirectStagesUsed = 0;
if (bpmem.genMode.numindstages > 0) {
for (int i = 0; i < numStages; ++i) {
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) {
nIndirectStagesUsed |= 1<<bpmem.tevind[i].bt;
}
}
}
// Declare samplers
if (texture_mask) {
WRITE(p, "uniform samplerRECT ");
bool bfirst = true;
for (int i = 0; i < 8; ++i) {
if (texture_mask & (1<<i)) {
WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",", i, i);
bfirst = false;
}
}
WRITE(p, ";\n");
}
if (texture_mask != 0xff) {
WRITE(p, "uniform sampler2D ");
bool bfirst = true;
for (int i = 0; i < 8; ++i) {
if (!(texture_mask & (1<<i))) {
WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",",i, i);
bfirst = false;
}
}
WRITE(p, ";\n");
}
WRITE(p, "\n");
WRITE(p, "uniform float4 "I_COLORS"[4] : register(c%d);\n", C_COLORS);
WRITE(p, "uniform float4 "I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS);
WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA);
WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS);
WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS);
WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE);
WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX);
WRITE(p, "void main(\n");
WRITE(p, "out half4 ocol0 : COLOR0,\n");
if (bRenderZ && !bRenderZToCol0 )
WRITE(p, "out half4 ocol1 : COLOR1,\n");
if (bOutputZ )
WRITE(p, " out float depth : DEPTH,\n");
// if zcoord might come from vertex shader in texcoord
if (bInputZ) {
if (numTexgen) {
for (int i = 0; i < numTexgen; ++i)
WRITE(p, " in float%d uv%d : TEXCOORD%d, \n", i==ztexcoord?4:3, i,i);
}
else
WRITE(p, " in float4 uv0 : TEXCOORD0,"); //HACK
}
else {
if (numTexgen) {
for (int i = 0; i < numTexgen; ++i)
WRITE(p, " in float3 uv%d : TEXCOORD%d,\n",i,i);
}
else
WRITE(p, " in float3 uv0 : TEXCOORD0,\n"); //HACK
}
WRITE(p, " in float4 colors[2] : COLOR0){\n");
char* pmainstart = p;
WRITE(p, "float4 c0="I_COLORS"[1],c1="I_COLORS"[2],c2="I_COLORS"[3],prev=float4(0.0f,0.0f,0.0f,0.0f),textemp,rastemp,konsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n"
"float3 comp16 = float3(1,255,0), comp24 = float3(1,255,255*255);\n"
"float4 alphabump=0;\n"
"float3 tevcoord;\n"
"float2 wrappedcoord, tempcoord;\n");
//if (bOutputZ ) WRITE(p, " float depth;\n");
// WRITE(p, "return 1;}\n");
// return PixelShaderMngr::CompilePixelShader(ps, text);
// indirect texture map lookup
for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) {
if (nIndirectStagesUsed & (1<<i)) {
// perform indirect texture map lookup
// note that we have to scale by the regular texture map's coordinates since this is a texRECT call
// (and we have to match with the game's texscale calls)
int texcoord = bpmem.tevindref.getTexCoord(i);
if (texture_mask & (1<<bpmem.tevindref.getTexMap(i))) {
// TODO: I removed a superfluous argument, please check that the resulting expression is correct. (mthuurne 2008-08-27)
WRITE(p, "float2 induv%d=uv%d.xy * "I_INDTEXSCALE"[%d].%s;\n", i, texcoord, i/2, (i&1)?"zw":"xy"); //, bpmem.tevindref.getTexMap(i)
char str[16];
sprintf(str, "induv%d", i);
WrapNonPow2Tex(p, str, bpmem.tevindref.getTexMap(i), texture_mask);
WRITE(p, "float3 indtex%d=texRECT(samp%d,induv%d.xy).abg;\n", i, bpmem.tevindref.getTexMap(i), i);
}
else {
WRITE(p, "float3 indtex%d=tex2D(samp%d,uv%d.xy*"I_INDTEXSCALE"[%d].%s).abg;\n", i, bpmem.tevindref.getTexMap(i), texcoord, i/2, (i&1)?"zw":"xy");
}
}
}
for (int i = 0; i < numStages; i++)
WriteStage(p, i, texture_mask); //build the equation for this stage
if (bOutputZ) {
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
if (bpmem.ztex2.op == ZTEXTURE_ADD) {
WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + uv%d.w);\n", ztexcoord);
}
else {
_assert_(bpmem.ztex2.op == ZTEXTURE_REPLACE);
WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyz, textemp.xyz) + "I_ZBIAS"[0].w);\n");
}
}
//if (bpmem.genMode.numindstages ) WRITE(p, "prev.rg = indtex0.xy;\nprev.b = 0;\n");
if (!WriteAlphaTest(p)) {
// alpha test will always fail, so restart the shader and just make it an empty function
p = pmainstart;
WRITE(p, "discard;\n");
WRITE(p, "ocol0 = 0;\n");
}
else {
if (!bRenderZToCol0) {
/* NEEDS FIX - dstalpha does not change how fragments are blended with the EFB
once the blending is done, the dstalpha is written to the EFB in place of the
fragment alpha if dstalpha is enabled. this only matters if the EFB supports alpha.
if (bpmem.dstalpha.enable)
WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n");
else*/
WRITE(p, " ocol0 = prev;\n");
}
}
if (bRenderZ) {
// write depth as color
if (bRenderZToCol0) {
if (bOutputZ )
WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * depth);\n");
else
WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * uv%d.w);\n", ztexcoord);
WRITE(p, "ocol0.w = prev.w;\n");
}
else {
if (bOutputZ)
WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * depth);\n");
else
WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * uv%d.w);\n", ztexcoord);
}
}
WRITE(p, "}\n");
if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("PixelShader generator - buffer too small, canary has been eaten!");
return text;
}
static void WriteStage(char *&p, int n, u32 texture_mask)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
int texfun = xfregs.texcoords[texcoord].texmtxinfo.projection;
bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages;
if (bHasIndStage) {
// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
bHasIndStage = true;
int texmap = bpmem.tevorders[n/2].getEnable(n&1) ? bpmem.tevorders[n/2].getTexMap(n&1) : bpmem.tevindref.getTexMap(bpmem.tevind[n].bt);
if (bpmem.tevind[n].bs != ITBA_OFF) {
// write the bump alpha
if (bpmem.tevind[n].fmt == ITF_8) {
WRITE(p, "alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]);
}
else {
// donkopunchstania: really bad way to do this
// cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0
// omitting fract seems to work as well
WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs]);
WRITE(p, " alphabump = 1.0f;\n");
WRITE(p, "else\n");
WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n", bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]);
}
}
// bias
WRITE(p, "float3 indtevcrd%d = indtex%d;\n", n, bpmem.tevind[n].bt);
WRITE(p, "indtevcrd%d.xy *= %s;\n", n, tevIndFmtScale[bpmem.tevind[n].fmt]);
if (bpmem.tevind[n].bias != ITB_NONE )
WRITE(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
// multiply by offset matrix and scale
if (bpmem.tevind[n].mid != 0) {
if (bpmem.tevind[n].mid <= 3) {
int mtxidx = 2*(bpmem.tevind[n].mid-1);
WRITE(p, "float2 indtevtrans%d = float2(dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n",
n, mtxidx, n, mtxidx+1, n);
}
else if (bpmem.tevind[n].mid <= 5) { // s matrix
int mtxidx = 2*(bpmem.tevind[n].mid-5);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
}
else if (bpmem.tevind[n].mid <= 9) { // t matrix
int mtxidx = 2*(bpmem.tevind[n].mid-9);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
}
else {
// TODO: I removed a superfluous argument, please check that the resulting expression is correct. (mthuurne 2008-08-27)
WRITE(p, "float2 indtevtrans%d = 0;\n", n); //, n
}
}
else {
// TODO: I removed a superfluous argument, please check that the resulting expression is correct. (mthuurne 2008-08-27)
WRITE(p, "float2 indtevtrans%d = 0;\n", n); //, n
}
// wrapping
if (!bpmem.tevorders[n/2].getEnable(n&1) || (texture_mask & (1<<texmap))) {
// non pow2
if (bpmem.tevind[n].sw != ITW_OFF || bpmem.tevind[n].tw != ITW_OFF) {
if (bpmem.tevind[n].sw == ITW_0) {
if (bpmem.tevind[n].tw == ITW_0) {
// zero out completely
WRITE(p, "wrappedcoord = float2(0.0f,0.0f);\n");
}
else {
WRITE(p, "wrappedcoord.x = fmod( (uv%d.x+%s)*"I_TEXDIMS"[%d].x*"I_TEXDIMS"[%d].z, %s);\n"
"wrappedcoord.y = 0;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw], texmap, texmap, tevIndWrapStart[bpmem.tevind[n].sw]);
}
}
else if (bpmem.tevind[n].tw == ITW_0) {
WRITE(p, "wrappedcoord.y = fmod( (uv%d.y+%s)*"I_TEXDIMS"[%d].y*"I_TEXDIMS"[%d].w, %s);\n"
"wrappedcoord.x = 0;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw], texmap, texmap, tevIndWrapStart[bpmem.tevind[n].tw]);
}
else {
WRITE(p, "wrappedcoord = fmod( (uv%d.xy+float2(%s,%s))*"I_TEXDIMS"[%d].xy*"I_TEXDIMS"[%d].zw, float2(%s,%s));\n", texcoord,
tevIndWrapStart[bpmem.tevind[n].sw], tevIndWrapStart[bpmem.tevind[n].tw],texmap,texmap,
tevIndWrapStart[bpmem.tevind[n].sw], tevIndWrapStart[bpmem.tevind[n].tw]);
}
}
else {
WRITE(p, "wrappedcoord = uv%d.xy*"I_TEXDIMS"[%d].xy;\n", texcoord, texmap);
}
}
else {
// pow of 2
WRITE(p, "indtevtrans%d.xy *= "I_TEXDIMS"[%d].xy * "I_TEXDIMS"[%d].zw;\n", n, texmap, texmap);
// mult by bitdepth / tex dimensions
if (bpmem.tevind[n].sw != ITW_OFF || bpmem.tevind[n].tw != ITW_OFF) {
if (bpmem.tevind[n].sw == ITW_0) {
if (bpmem.tevind[n].tw == ITW_0) {
// zero out completely
WRITE(p, "wrappedcoord = float2(0.0f,0.0f);\n");
}
else {
WRITE(p, "wrappedcoord.x = "I_TEXDIMS"[%d].x * fmod( uv%d.x+%s, "I_TEXDIMS"[%d].z*%s);\n"
"wrappedcoord.y = 0;\n", texmap, texcoord, tevIndWrapStart[bpmem.tevind[n].sw], texmap, tevIndWrapStart[bpmem.tevind[n].sw]);
}
}
else if (bpmem.tevind[n].tw == ITW_0) {
WRITE(p, "wrappedcoord.y = "I_TEXDIMS"[%d].y * fmod( uv%d.y+%s, "I_TEXDIMS"[%d].w*%s);\n"
"wrappedcoord.x = 0;\n", texmap, texcoord, tevIndWrapStart[bpmem.tevind[n].tw], texmap, tevIndWrapStart[bpmem.tevind[n].tw]);
}
else {
// have to add an offset or else might get negative values!
WRITE(p, "wrappedcoord = "I_TEXDIMS"[%d].xy * fmod( uv%d.xy+float2(%s,%s), "I_TEXDIMS"[%d].zw*float2(%s,%s));\n", texmap, texcoord,
tevIndWrapStart[bpmem.tevind[n].sw], tevIndWrapStart[bpmem.tevind[n].tw], texmap,
tevIndWrapStart[bpmem.tevind[n].sw], tevIndWrapStart[bpmem.tevind[n].tw]);
}
}
else {
WRITE(p, "wrappedcoord = uv%d.xy;\n", texcoord);
}
}
if (bpmem.tevind[n].fb_addprev) {
// add previous tevcoord
if (texfun == XF_TEXPROJ_STQ) {
WRITE(p, "tevcoord.xy += wrappedcoord/uv%d.z + indtevtrans%d;\n", texcoord, n);
//WRITE(p, "tevcoord.z += uv%d.z;\n", texcoord);
}
else {
WRITE(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
}
}
else {
WRITE(p, "tevcoord.xy = wrappedcoord/uv%d.z + indtevtrans%d;\n", texcoord, n);
//if (texfun == XF_TEXPROJ_STQ )
// WRITE(p, "tevcoord.z = uv%d.z;\n", texcoord);
}
}
WRITE(p, "rastemp=%s.%s;\n",tevRasTable[bpmem.tevorders[n/2].getColorChan(n&1)],rasswap);
if (bpmem.tevorders[n/2].getEnable(n&1)) {
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
if(!bHasIndStage) {
// calc tevcord
//tevcoord.xy = texdim[1].xy * uv1.xy / uv1.z;
int OurTexCoord = 0;
if(bpmem.genMode.numtexgens)
OurTexCoord = texcoord;
else
OurTexCoord = 0;
if (texture_mask & (1<<texmap)) {
// nonpow2
if (texfun == XF_TEXPROJ_STQ )
WRITE(p, "tevcoord.xy = uv%d.xy / uv%d.z;\n", texcoord, OurTexCoord);
else
WRITE(p, "tevcoord.xy = uv%d.xy;\n", OurTexCoord);
WrapNonPow2Tex(p, "tevcoord", texmap, texture_mask);
}
else {
if (texfun == XF_TEXPROJ_STQ )
WRITE(p, "tevcoord.xy = "I_TEXDIMS"[%d].xy * uv%d.xy / uv%d.z;\n", texmap, OurTexCoord , OurTexCoord );
else
WRITE(p, "tevcoord.xy = "I_TEXDIMS"[%d].xy * uv%d.xy;\n", texmap, OurTexCoord);
}
}
else if (texture_mask & (1<<texmap)) {
// if non pow 2, have to manually repeat
//WrapNonPow2Tex(p, "tevcoord", texmap);
bool bwraps = !!(texture_mask & (0x100<<texmap));
bool bwrapt = !!(texture_mask & (0x10000<<texmap));
if (bwraps || bwrapt) {
const char* field = bwraps ? (bwrapt ? "xy" : "x") : "y";
WRITE(p, "tevcoord.%s = fmod(tevcoord.%s+32*"I_TEXDIMS"[%d].%s,"I_TEXDIMS"[%d].%s);\n", field, field, texmap, field, texmap, field);
}
}
if (texture_mask & (1<<texmap) )
WRITE(p, "textemp=texRECT(samp%d,tevcoord.xy).%s;\n", texmap, texswap);
else
WRITE(p, "textemp=tex2D(samp%d,tevcoord.xy).%s;\n", texmap, texswap);
}
else
WRITE(p, "textemp=float4(1,1,1,1);\n");
int kc = bpmem.tevksel[n/2].getKC(n&1);
int ka = bpmem.tevksel[n/2].getKA(n&1);
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
bool bCKonst = cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST;
bool bAKonst = ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST;
if (bCKonst || bAKonst )
WRITE(p, "konsttemp=float4(%s,%s);\n",tevKSelTableC[kc],tevKSelTableA[ka]);
WRITE(p, "%s= ", tevCOutputTable[cc.dest]);
// combine the color channel
if (cc.bias != 3) { // if not compare
//normal color combiner goes here
WRITE(p, " %s*(%s%s",tevScaleTable[cc.shift],tevCInputTable[cc.d],tevOpTable[cc.op]);
WRITE(p, "lerp(%s,%s,%s) %s);\n",
tevCInputTable[cc.a], tevCInputTable[cc.b],
tevCInputTable[cc.c], tevBiasTable[cc.bias]);
}
else {
int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
switch(cmp) {
case TEVCMP_R8_GT:
case TEVCMP_RGB8_GT: // per component compares
WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : float3(0.0f,0.0f,0.0f));\n",
tevCInputTable[cc.d], tevCInputTable2[cc.a], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable2[cc.b], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable[cc.c]);
break;
case TEVCMP_R8_EQ:
case TEVCMP_RGB8_EQ:
WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f));\n",
tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], epsilon8bit, tevCInputTable[cc.c]);
break;
case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f));\n",
tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_GT?"16":"24", tevCInputTable[cc.c]);
break;
case TEVCMP_GR16_EQ:
case TEVCMP_BGR24_EQ:
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f));\n",
tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_GT?"16":"24", epsilon8bit, tevCInputTable[cc.c]);
break;
default:
WRITE(p, "float3(0.0f,0.0f,0.0f);\n");
break;
}
}
if (cc.clamp)
WRITE(p, "%s = clamp(%s,0.0f,1.0f);\n", tevCOutputTable[cc.dest],tevCOutputTable[cc.dest]);
// combine the alpha channel
WRITE(p, "%s= ", tevAOutputTable[ac.dest]);
if (ac.bias != 3) { // if not compare
//normal alpha combiner goes here
WRITE(p, " %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]);
WRITE(p, "lerp(%s,%s,%s) %s)\n",
tevAInputTable[ac.a],tevAInputTable[ac.b],
tevAInputTable[ac.c],tevBiasTable[ac.bias]);
}
else {
//compare alpha combiner goes here
int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
switch(cmp) {
case TEVCMP_R8_GT:
case TEVCMP_A8_GT:
WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : 0)\n",
tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]);
break;
case TEVCMP_R8_EQ:
case TEVCMP_A8_EQ:
WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : 0)\n",
tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon8bit,tevAInputTable[ac.c]);
break;
case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)\n",
tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]);
break;
case TEVCMP_GR16_EQ:
case TEVCMP_BGR24_EQ:
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)\n",
tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon8bit,tevAInputTable[ac.c]);
break;
default:
WRITE(p, "0)\n");
break;
}
}
WRITE(p, ";\n");
if (ac.clamp)
WRITE(p, "%s = clamp(%s,0.0f,1.0f);\n", tevAOutputTable[ac.dest],tevAOutputTable[ac.dest]);
WRITE(p, "\n");
}
void WrapNonPow2Tex(char* &p, const char* var, int texmap, u32 texture_mask)
{
_assert_(texture_mask & (1<<texmap));
bool bwraps = !!(texture_mask & (0x100<<texmap));
bool bwrapt = !!(texture_mask & (0x10000<<texmap));
if (bwraps || bwrapt) {
const char* field = bwraps ? (bwrapt ? "xy" : "x") : "y";
const char* wrapfield = bwraps ? (bwrapt ? "zw" : "z") : "w";
WRITE(p, "%s.%s = "I_TEXDIMS"[%d].%s*frac(%s.%s*"I_TEXDIMS"[%d].%s+32);\n", var, field, texmap, field, var, field, texmap, wrapfield);
if (!bwraps )
WRITE(p, "%s.x *= "I_TEXDIMS"[%d].x * "I_TEXDIMS"[%d].z;\n", var, texmap, texmap);
if (!bwrapt )
WRITE(p, "%s.y *= "I_TEXDIMS"[%d].y * "I_TEXDIMS"[%d].w;\n", var, texmap, texmap);
}
else {
WRITE(p, "%s.xy *= "I_TEXDIMS"[%d].xy * "I_TEXDIMS"[%d].zw;\n", var, texmap, texmap);
}
}
static void WriteAlphaCompare(char *&p, int num, int comp)
{
switch(comp) {
case ALPHACMP_ALWAYS: WRITE(p, "(false)"); break;
case ALPHACMP_NEVER: WRITE(p, "(true)"); break;
case ALPHACMP_LEQUAL: WRITE(p, "(prev.a > %s)",alphaRef[num]); break;
case ALPHACMP_LESS: WRITE(p, "(prev.a >= %s - %f)",alphaRef[num],epsilon8bit*0.5f);break;
case ALPHACMP_GEQUAL: WRITE(p, "(prev.a < %s)",alphaRef[num]); break;
case ALPHACMP_GREATER: WRITE(p, "(prev.a <= %s + %f)",alphaRef[num],epsilon8bit*0.5f);break;
case ALPHACMP_EQUAL: WRITE(p, "(abs(prev.a-%s)>%f)",alphaRef[num],epsilon8bit*2); break;
case ALPHACMP_NEQUAL: WRITE(p, "(abs(prev.a-%s)<%f)",alphaRef[num],epsilon8bit*2); break;
}
}
static bool WriteAlphaTest(char *&p)
{
u32 op = bpmem.alphaFunc.logic;
u32 comp[2] = {bpmem.alphaFunc.comp0,bpmem.alphaFunc.comp1};
//first kill all the simple cases
switch(op) {
case 0: // and
if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) {
WRITE(p, "discard;\n");
return false;
}
break;
case 1: // or
if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) {
WRITE(p, "discard;\n");
return false;
}
break;
case 2: // xor
if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS) ) return true;
if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)) {
WRITE(p, "discard;\n");
return false;
}
break;
case 3: // xnor
if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS)) {
WRITE(p, "discard;\n");
return false;
}
if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) )
return true;
break;
}
WRITE(p, "discard( ");
WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0);
// negated because testing the inverse condition
switch(bpmem.alphaFunc.logic) {
case 0: WRITE(p, " || "); break; // and
case 1: WRITE(p, " && "); break; // or
case 2: WRITE(p, " == "); break; // xor
case 3: WRITE(p, " != "); break; // xnor
}
WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1);
WRITE(p, ");\n");
return true;
}

View File

@ -1,44 +0,0 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef GCOGL_PIXELSHADER
#define GCOGL_PIXELSHADER
#include "Common.h"
#define I_COLORS "color"
#define I_KCOLORS "k"
#define I_ALPHA "alphaRef"
#define I_TEXDIMS "texdim"
#define I_ZBIAS "czbias"
#define I_INDTEXSCALE "cindscale"
#define I_INDTEXMTX "cindmtx"
#define C_COLORS 0
#define C_KCOLORS (C_COLORS+4)
#define C_ALPHA (C_KCOLORS+4)
#define C_TEXDIMS (C_ALPHA+1)
#define C_ZBIAS (C_TEXDIMS+8)
#define C_INDTEXSCALE (C_ZBIAS+2)
#define C_INDTEXMTX (C_INDTEXSCALE+2)
#define C_ENVCONST_END (C_INDTEXMTX+6)
#define C_COLORMATRIX (C_INDTEXMTX+6)
char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0);
#endif

View File

@ -23,6 +23,7 @@
#include <cmath>
#include "Statistics.h"
#include "ImageWrite.h"
#include "Common.h"
#include "Render.h"
@ -32,7 +33,7 @@
PixelShaderMngr::PSCache PixelShaderMngr::pshaders;
FRAGMENTSHADER* PixelShaderMngr::pShaderLast = NULL;
PixelShaderMngr::PIXELSHADERUID PixelShaderMngr::s_curuid;
PIXELSHADERUID PixelShaderMngr::s_curuid;
static int s_nMaxPixelInstructions;
static int s_nColorsChanged[2]; // 0 - regular colors, 1 - k colors
@ -517,7 +518,7 @@ GLuint PixelShaderMngr::GetColorMatrixProgram()
// Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void PixelShaderMngr::GetPixelShaderId(PixelShaderMngr::PIXELSHADERUID& uid)
void PixelShaderMngr::GetPixelShaderId(PIXELSHADERUID &uid)
{
u32 projtexcoords = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++) {

View File

@ -18,12 +18,10 @@
#ifndef _PIXELSHADERMANAGER_H
#define _PIXELSHADERMANAGER_H
#include "PixelShader.h"
#include <map>
#include "BPMemory.h"
#include <map>
struct FRAGMENTSHADER
{
FRAGMENTSHADER() : glprogid(0) { }
@ -33,65 +31,59 @@ struct FRAGMENTSHADER
#endif
};
class PIXELSHADERUID
{
public:
u32 values[4+32+6+11];
u16 tevstages, indstages;
PIXELSHADERUID() {
memset(values, 0, (4+32+6+11) * 4);
tevstages = indstages = 0;
}
PIXELSHADERUID(const PIXELSHADERUID& r)
{
tevstages = r.tevstages;
indstages = r.indstages;
int N = tevstages + indstages + 3;
_assert_(N <= 4+32+6+11);
for (int i = 0; i < N; ++i)
values[i] = r.values[i];
}
int GetNumValues() const {
return tevstages + indstages + 3; // numTevStages*3/2+1
}
bool operator <(const PIXELSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i) {
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}
return false;
}
bool operator ==(const PIXELSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i) {
if (values[i] != _Right.values[i])
return false;
}
return true;
}
};
class PixelShaderMngr
{
class PIXELSHADERUID
{
public:
PIXELSHADERUID() {
values = new u32[4+32+6+11];
memset(values, 0, (4+32+6+11) * 4);
tevstages = indstages = 0;
}
~PIXELSHADERUID() { delete[] values; values = NULL;}
PIXELSHADERUID(const PIXELSHADERUID& r)
{
values = new u32[4+32+6+11];
tevstages = r.tevstages; indstages = r.indstages;
int N = tevstages + indstages + 3;
_assert_(N <= 4+32+6+11);
for(int i = 0; i < N; ++i)
values[i] = r.values[i];
}
bool operator <(const PIXELSHADERUID& _Right) const
{
if( values[0] < _Right.values[0] )
return true;
else if( values[0] > _Right.values[0] )
return false;
int N = tevstages + indstages + 3; // numTevStages*3/2+1
int i = 1;
for(; i < N; ++i) {
if( values[i] < _Right.values[i] )
return true;
else if( values[i] > _Right.values[i] )
return false;
}
return false;
}
bool operator ==(const PIXELSHADERUID& _Right) const
{
if( values[0] != _Right.values[0] )
return false;
int N = tevstages + indstages + 3; // numTevStages*3/2+1
int i = 1;
for(; i < N; ++i) {
if( values[i] != _Right.values[i] )
return false;
}
return true;
}
u32* values;
u16 tevstages, indstages;
};
struct PSCacheEntry
{
FRAGMENTSHADER shader;
@ -105,7 +97,7 @@ class PixelShaderMngr
}
};
typedef std::map<PIXELSHADERUID,PSCacheEntry> PSCache;
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
static FRAGMENTSHADER* pShaderLast; // last used shader
static PSCache pshaders;

View File

@ -29,6 +29,7 @@
#include "GLInit.h"
#include "Profiler.h"
#include "Statistics.h"
#include "ImageWrite.h"
#include "Render.h"
#include "OpcodeDecoding.h"

View File

@ -13,7 +13,6 @@ files = [
'main.cpp',
'memcpy_amd.cpp',
'OpcodeDecoding.cpp',
'PixelShader.cpp',
'PixelShaderManager.cpp',
'rasterfont.cpp',
'Render.cpp',
@ -22,7 +21,6 @@ files = [
'VertexManager.cpp',
'VertexLoader.cpp',
'VertexLoader_Normal.cpp',
'VertexShader.cpp',
'VertexShaderManager.cpp',
'XFB.cpp',
'GUI/ConfigDlg.cpp',

View File

@ -29,6 +29,7 @@
#undef _interlockedbittestandreset64
#endif
#include "Statistics.h"
#include "Profiler.h"
#include "ImageWrite.h"
@ -51,8 +52,15 @@ static u32 s_TempFramebuffer = 0;
#define TEMP_SIZE (1024*1024*4)
const GLint c_MinLinearFilter[8] = {
GL_NEAREST, GL_NEAREST_MIPMAP_NEAREST, GL_NEAREST_MIPMAP_LINEAR, GL_NEAREST,
GL_LINEAR, GL_LINEAR_MIPMAP_NEAREST, GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR};
GL_NEAREST,
GL_NEAREST_MIPMAP_NEAREST,
GL_NEAREST_MIPMAP_LINEAR,
GL_NEAREST,
GL_LINEAR,
GL_LINEAR_MIPMAP_NEAREST,
GL_LINEAR_MIPMAP_LINEAR,
GL_LINEAR
};
const GLint c_WrapSettings[4] = { GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT, GL_REPEAT };

View File

@ -28,6 +28,7 @@
#include "StringUtil.h"
#include "Render.h"
#include "VertexShader.h"
#include "VertexManager.h"
#include "VertexLoader.h"
#include "BPStructs.h"
@ -55,7 +56,6 @@ static int colIndex;
#define inline
#endif
TVtxDesc VertexManager::s_GlobalVtxDesc;
// ==============================================================================
// Direct
@ -120,7 +120,7 @@ VertexLoader::VertexLoader()
{
m_numPipelineStages = 0;
m_VertexSize = 0;
m_AttrDirty = 1;
m_AttrDirty = AD_DIRTY;
VertexLoader_Normal::Init();
m_compiledCode = (u8 *)AllocateExecutableMemory(COMPILED_CODE_SIZE, false);
@ -136,7 +136,7 @@ VertexLoader::~VertexLoader()
int VertexLoader::ComputeVertexSize()
{
if (!m_AttrDirty) {
if (m_AttrDirty == AD_CLEAN) {
// Compare the 33 desc bits.
if (m_VtxDesc.Hex0 == VertexManager::GetVtxDesc().Hex0 &&
(m_VtxDesc.Hex1 & 1) == (VertexManager::GetVtxDesc().Hex1 & 1))
@ -152,7 +152,7 @@ int VertexLoader::ComputeVertexSize()
if (fnSetupVertexPointers != NULL && fnSetupVertexPointers == (void (*)())(void*)m_compiledCode)
VertexManager::Flush();
m_AttrDirty = 1;
m_AttrDirty = AD_DIRTY;
m_VertexSize = 0;
// Position Matrix Index
if (m_VtxDesc.PosMatIdx)
@ -257,6 +257,7 @@ int VertexLoader::ComputeVertexSize()
return m_VertexSize;
}
// Note the use of CallCdeclFunction3I etc.
// This is a horrible hack that is necessary because in 64-bit mode, Opengl32.dll is based way, way above the 32-bit
// address space that is within reach of a CALL, and just doing &fn gives us these high uncallable addresses. So we
@ -269,20 +270,24 @@ DECLARE_IMPORT(glVertexPointer);
DECLARE_IMPORT(glColorPointer);
DECLARE_IMPORT(glTexCoordPointer);
void VertexLoader::ProcessFormat()
void VertexLoader::PrepareForVertexFormat()
{
using namespace Gen;
//_assert_( VertexManager::s_pCurBufferPointer == s_pBaseBufferPointer );
if (!m_AttrDirty)
if (m_AttrDirty == AD_CLEAN)
{
// Check if local cached desc (in this VL) matches global desc
if (m_VtxDesc.Hex0 == VertexManager::GetVtxDesc().Hex0 && (m_VtxDesc.Hex1 & 1)==(VertexManager::GetVtxDesc().Hex1 & 1))
return; // same
{
return; // same
}
}
else
m_AttrDirty = 0;
else
{
m_AttrDirty = AD_CLEAN;
}
m_VtxDesc.Hex = VertexManager::GetVtxDesc().Hex;
DVSTARTPROFILE();
@ -315,7 +320,7 @@ void VertexLoader::ProcessFormat()
m_VBVertexStride += 12;
switch (m_VtxDesc.Position) {
case NOT_PRESENT: {_assert_msg_(0,"Vertex descriptor without position!","WTF?");} break;
case NOT_PRESENT: {_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break;
case DIRECT:
{
switch (m_VtxAttr.PosFormat) {
@ -409,7 +414,6 @@ void VertexLoader::ProcessFormat()
m_VBVertexStride += 6; // still include the texture coordinate, but this time as 6 bytes
m_components |= VB_HAS_UV0 << i; // have to include since using now
}
}
else {
if (tc[i] != NOT_PRESENT)
@ -425,8 +429,7 @@ void VertexLoader::ProcessFormat()
break;
}
}
if (j == 8 && !((m_components&VB_HAS_TEXMTXIDXALL)&(VB_HAS_TEXMTXIDXALL<<(i+1)))) // no more tex coords and tex matrices, so exit loop
if (j == 8 && !((m_components&VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL<<(i+1)))) // no more tex coords and tex matrices, so exit loop
break;
}
}
@ -438,18 +441,18 @@ void VertexLoader::ProcessFormat()
if (m_VBVertexStride & 3) {
// make sure all strides are at least divisible by 4 (some gfx cards experience a 3x speed boost)
m_VBStridePad = 4 - (m_VBVertexStride&3);
m_VBStridePad = 4 - (m_VBVertexStride & 3);
m_VBVertexStride += m_VBStridePad;
}
// compile the pointer set function
// compile the pointer set function - why?
u8 *old_code_ptr = GetWritableCodePtr();
SetCodePtr(m_compiledCode);
Util::EmitPrologue(6);
int offset = 0;
// Position
if (m_VtxDesc.Position != NOT_PRESENT) {
if (m_VtxDesc.Position != NOT_PRESENT) { // TODO: Why the check? Always present, AFAIK!
CallCdeclFunction4_I(glVertexPointer, 3, GL_FLOAT, m_VBVertexStride, offset);
offset += 12;
}
@ -484,6 +487,8 @@ void VertexLoader::ProcessFormat()
}
}
// TODO : With byte or short normals above, offset will be misaligned (not 4byte aligned)! Ugh!
for (int i = 0; i < 2; i++) {
if (col[i] != NOT_PRESENT) {
if (i)
@ -496,9 +501,8 @@ void VertexLoader::ProcessFormat()
// TextureCoord
for (int i = 0; i < 8; i++) {
if (tc[i] != NOT_PRESENT || (m_components&(VB_HAS_TEXMTXIDX0<<i))) {
int id = GL_TEXTURE0+i;
if (tc[i] != NOT_PRESENT || (m_components & (VB_HAS_TEXMTXIDX0 << i))) {
int id = GL_TEXTURE0 + i;
#ifdef _M_X64
#ifdef _MSC_VER
MOV(32, R(RCX), Imm32(id));
@ -517,6 +521,7 @@ void VertexLoader::ProcessFormat()
ABI_RestoreStack(1 * 4);
#endif
#endif
// TODO : More potential disalignment!
if (m_components & (VB_HAS_TEXMTXIDX0 << i)) {
if (tc[i] != NOT_PRESENT) {
CallCdeclFunction4_I(glTexCoordPointer, 3, GL_FLOAT, m_VBVertexStride, offset);
@ -528,14 +533,14 @@ void VertexLoader::ProcessFormat()
}
}
else {
CallCdeclFunction4_I(glTexCoordPointer, m_VtxAttr.texCoord[i].Elements?2:1, GL_FLOAT, m_VBVertexStride, offset);
CallCdeclFunction4_I(glTexCoordPointer, m_VtxAttr.texCoord[i].Elements ? 2 : 1, GL_FLOAT, m_VBVertexStride, offset);
offset += 4 * (m_VtxAttr.texCoord[i].Elements?2:1);
}
}
}
if (m_VtxDesc.PosMatIdx) {
CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB,1,GL_UNSIGNED_BYTE, GL_FALSE, m_VBVertexStride, offset);
CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB, 1, GL_UNSIGNED_BYTE, GL_FALSE, m_VBVertexStride, offset);
offset += 1;
}
@ -551,19 +556,6 @@ void VertexLoader::ProcessFormat()
SetCodePtr(old_code_ptr);
}
void VertexLoader::PrepareRun()
{
posScale = shiftLookup[m_VtxAttr.PosFrac];
if (m_components & VB_HAS_UVALL) {
for (int i = 0; i < 8; i++) {
tcScaleU[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
tcScaleV[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
}
}
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
}
void VertexLoader::SetupColor(int num, int mode, int format, int elements)
{
// if COL0 not present, then embed COL1 into COL0
@ -669,38 +661,49 @@ void VertexLoader::RunVertices(int primitive, int count)
{
DVSTARTPROFILE();
ComputeVertexSize(); // HACK for underruns in Super Monkey Ball etc. !!!! dirty handling must be wrong.
if (count <= 0)
return;
// This has dirty handling - won't actually recompute unless necessary.
ComputeVertexSize();
// Figure out a better check. Also, jitting fnSetupVertexPointers seems pretty silly - not likely to be a bottleneck.
if (fnSetupVertexPointers != NULL && fnSetupVertexPointers != (void (*)())(void*)m_compiledCode)
VertexManager::Flush();
if (bpmem.genMode.cullmode == 3 && primitive < 5)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count*m_VertexSize);
DataSkip(count * m_VertexSize);
return;
}
ProcessFormat();
fnSetupVertexPointers = (void (*)())(void*)m_compiledCode;
// This has dirty handling - won't actually recompute unless necessary.
PrepareForVertexFormat();
fnSetupVertexPointers = (void (*)())(void*)m_compiledCode;
VertexManager::EnableComponents(m_components);
PrepareRun();
// Load position and texcoord scale factors.
// Hm, this could be done when the VtxAttr is set, instead.
posScale = shiftLookup[m_VtxAttr.PosFrac];
if (m_components & VB_HAS_UVALL) {
for (int i = 0; i < 8; i++) {
tcScaleU[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
tcScaleV[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
}
}
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
int granularity = 1;
switch(primitive) {
switch (primitive) {
case 3: // strip
case 4: // fan
if (VertexManager::GetRemainingSize() < 3*m_VBVertexStride )
if (VertexManager::GetRemainingSize() < 3 * m_VBVertexStride )
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2*m_VBVertexStride )
if (VertexManager::GetRemainingSize() < 2 * m_VBVertexStride )
VertexManager::Flush();
break;
case 0: // quads

View File

@ -64,7 +64,11 @@ private:
void SetupTexCoord(int num, int _iMode, int _iFormat, int _iElements, int _iFrac);
// The 3 possible values (0, 1, 2) should be documented here.
int m_AttrDirty;
enum {
AD_CLEAN = 0,
AD_DIRTY = 1,
AD_VAT_DIRTY = 2,
} m_AttrDirty;
public:
// constructor
@ -72,23 +76,19 @@ public:
~VertexLoader();
// run the pipeline
void ProcessFormat();
void PrepareRun();
void PrepareForVertexFormat();
void RunVertices(int primitive, int count);
void WriteCall(void (LOADERDECL *func)(void *));
int GetGCVertexSize() const { _assert_( !m_AttrDirty ); return m_VertexSize; }
int GetVBVertexStride() const { _assert_( !m_AttrDirty); return m_VBVertexStride; }
int GetGCVertexSize() const { _assert_( !m_AttrDirty ); return m_VertexSize; }
int GetVBVertexStride() const { _assert_( !m_AttrDirty); return m_VBVertexStride; }
int ComputeVertexSize();
// SetVAT_group
// ignore PosFrac, texCoord[i].Frac
void SetVAT_group0(u32 _group0)
{
if ((m_group0.Hex & ~0x3e0001f0) != (_group0 & ~0x3e0001f0)) {
m_AttrDirty = 2;
m_AttrDirty = AD_VAT_DIRTY;
}
m_group0.Hex = _group0;
@ -111,10 +111,9 @@ public:
void SetVAT_group1(u32 _group1)
{
if ((m_group1.Hex & ~0x7c3e1f0) != (_group1 & ~0x7c3e1f0)) {
m_AttrDirty = 2;
m_AttrDirty = AD_VAT_DIRTY;
}
m_group1.Hex = _group1;
m_VtxAttr.texCoord[1].Elements = m_group1.Tex1CoordElements;
m_VtxAttr.texCoord[1].Format = m_group1.Tex1CoordFormat;
@ -132,7 +131,7 @@ public:
void SetVAT_group2(u32 _group2)
{
if ((m_group2.Hex & ~0xf87c3e1f) != (_group2 & ~0xf87c3e1f)) {
m_AttrDirty = 2;
m_AttrDirty = AD_VAT_DIRTY;
}
m_group2.Hex = _group2;

View File

@ -3,6 +3,7 @@
#include <fstream>
#include <vector>
#include "Statistics.h"
#include "MemoryUtil.h"
#include "Profiler.h"
#include "Render.h"
@ -11,6 +12,7 @@
#include "TextureMngr.h"
#include "PixelShaderManager.h"
#include "VertexShaderManager.h"
#include "VertexShader.h"
#include "VertexLoader.h"
#include "VertexManager.h"
@ -23,6 +25,7 @@ static vector< pair<int, int> > s_vStoredPrimitives; // every element, mode and
static u32 s_prevcomponents; // previous state set
u8* VertexManager::s_pCurBufferPointer = NULL;
TVtxDesc VertexManager::s_GlobalVtxDesc;
static const GLenum c_primitiveType[8] =
{
@ -294,32 +297,32 @@ void VertexManager::Flush()
ResetBuffer();
}
void VertexManager::LoadCPReg(u32 SubCmd, u32 Value)
void VertexManager::LoadCPReg(u32 sub_cmd, u32 value)
{
switch (SubCmd & 0xF0)
switch (sub_cmd & 0xF0)
{
case 0x30:
VertexShaderMngr::SetTexMatrixChangedA(Value);
VertexShaderMngr::SetTexMatrixChangedA(value);
break;
case 0x40:
VertexShaderMngr::SetTexMatrixChangedB(Value);
VertexShaderMngr::SetTexMatrixChangedB(value);
break;
case 0x50:
s_GlobalVtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
s_GlobalVtxDesc.Hex |= Value;
s_GlobalVtxDesc.Hex |= value;
break;
case 0x60:
s_GlobalVtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
s_GlobalVtxDesc.Hex |= (u64)Value << 17;
s_GlobalVtxDesc.Hex |= (u64)value << 17;
break;
case 0x70: g_VertexLoaders[SubCmd & 7].SetVAT_group0(Value); _assert_((SubCmd & 0x0F) < 8); break;
case 0x80: g_VertexLoaders[SubCmd & 7].SetVAT_group1(Value); _assert_((SubCmd & 0x0F) < 8); break;
case 0x90: g_VertexLoaders[SubCmd & 7].SetVAT_group2(Value); _assert_((SubCmd & 0x0F) < 8); break;
case 0x70: g_VertexLoaders[sub_cmd & 7].SetVAT_group0(value); _assert_((sub_cmd & 0x0F) < 8); break;
case 0x80: g_VertexLoaders[sub_cmd & 7].SetVAT_group1(value); _assert_((sub_cmd & 0x0F) < 8); break;
case 0x90: g_VertexLoaders[sub_cmd & 7].SetVAT_group2(value); _assert_((sub_cmd & 0x0F) < 8); break;
case 0xA0: arraybases[SubCmd & 0xF] = Value & 0xFFFFFFFF; break;
case 0xB0: arraystrides[SubCmd & 0xF] = Value & 0xFF; break;
case 0xA0: arraybases[sub_cmd & 0xF] = value & 0xFFFFFFFF; break;
case 0xB0: arraystrides[sub_cmd & 0xF] = value & 0xFF; break;
}
}

View File

@ -1,468 +0,0 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Globals.h"
#include "Profiler.h"
#include <math.h>
#include "BPStructs.h"
#include "VertexShader.h"
// This is the tricky one to get rid off.
// #include "VertexLoader.h"
static char text[16384];
#define WRITE p+=sprintf
#define LIGHTS_POS ""
char *GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha);
char *GenerateVertexShader(u32 components, bool has_zbuffer_target)
{
text[sizeof(text) - 1] = 0x7C; // canary
DVSTARTPROFILE();
_assert_( bpmem.genMode.numtexgens == xfregs.numTexGens);
_assert_( bpmem.genMode.numcolchans == xfregs.nNumChans);
u32 lightMask = 0;
if (xfregs.nNumChans > 0)
lightMask |= xfregs.colChans[0].color.GetFullLightMask() | xfregs.colChans[0].alpha.GetFullLightMask();
if (xfregs.nNumChans > 1)
lightMask |= xfregs.colChans[1].color.GetFullLightMask() | xfregs.colChans[1].alpha.GetFullLightMask();
bool bOutputZ = bpmem.ztex2.op==ZTEXTURE_ADD || has_zbuffer_target;
int ztexcoord = -1;
char *p = text;
WRITE(p, "//Vertex Shader: comp:%x, \n", components);
WRITE(p, "typedef struct {\n"
" float4 T0, T1, T2;\n"
" float4 N0, N1, N2;\n"
"} s_"I_POSNORMALMATRIX";\n\n"
"typedef struct {\n"
" float4 t;\n"
"} FLT4;\n"
"typedef struct {\n"
" FLT4 T[24];\n"
"} s_"I_TEXMATRICES";\n\n"
"typedef struct {\n"
" FLT4 T[64];\n"
"} s_"I_TRANSFORMMATRICES";\n\n"
"typedef struct {\n"
" FLT4 T[32];\n"
"} s_"I_NORMALMATRICES";\n\n"
"typedef struct {\n"
" FLT4 T[64];\n"
"} s_"I_POSTTRANSFORMMATRICES";\n\n"
"typedef struct {\n"
" float4 col;\n"
" float4 cosatt;\n"
" float4 distatt;\n"
" float4 pos;\n"
" float4 dir;\n"
"} Light;\n\n"
"typedef struct {\n"
" Light lights[8];\n"
"} s_"I_LIGHTS";\n\n"
"typedef struct {\n"
" float4 C0, C1, C2, C3;\n"
"} s_"I_MATERIALS";\n\n"
"typedef struct {\n"
" float4 T0,T1,T2,T3;\n"
"} s_"I_PROJECTION";\n"
"typedef struct {\n"
" float4 params;\n" // a, b, c, b_shift
"} s_"I_FOGPARAMS";\n\n");
WRITE(p, "struct VS_OUTPUT {\n");
WRITE(p, " float4 pos : POSITION;\n");
WRITE(p, " float4 colors[2] : COLOR0;\n");
// if outputting Z, embed the Z coordinate in the w component of a texture coordinate
// if number of tex gens occupies all the texture coordinates, use the last tex coord
// otherwise use the next available tex coord
for (int i = 0; i < xfregs.numTexGens; ++i) {
WRITE(p, " float%d tex%d : TEXCOORD%d;\n", (i==(xfregs.numTexGens-1)&&bOutputZ)?4:3, i, i);
}
if (bOutputZ && xfregs.numTexGens == 0) {
ztexcoord = 0;
WRITE(p, " float4 tex%d : TEXCOORD%d;\n", ztexcoord, ztexcoord);
}
else if (bOutputZ)
ztexcoord = xfregs.numTexGens - 1;
WRITE(p, "};\n");
WRITE(p, "\n");
// uniforms
// bool bTexMtx = ((components & VB_HAS_TEXMTXIDXALL)<<VB_HAS_UVTEXMTXSHIFT)!=0; unused TODO: keep?
WRITE(p, "uniform s_"I_TRANSFORMMATRICES" "I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES);
WRITE(p, "uniform s_"I_TEXMATRICES" "I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); // also using tex matrices
WRITE(p, "uniform s_"I_NORMALMATRICES" "I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES);
WRITE(p, "uniform s_"I_POSNORMALMATRIX" "I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX);
WRITE(p, "uniform s_"I_POSTTRANSFORMMATRICES" "I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES);
WRITE(p, "uniform s_"I_LIGHTS" "I_LIGHTS" : register(c%d);\n", C_LIGHTS);
WRITE(p, "uniform s_"I_MATERIALS" "I_MATERIALS" : register(c%d);\n", C_MATERIALS);
WRITE(p, "uniform s_"I_PROJECTION" "I_PROJECTION" : register(c%d);\n", C_PROJECTION);
WRITE(p, "uniform s_"I_FOGPARAMS" "I_FOGPARAMS" : register(c%d);\n", C_FOGPARAMS);
WRITE(p, "VS_OUTPUT main(\n");
// inputs
if (components & VB_HAS_NRM0)
WRITE(p, " float3 rawnorm0 : NORMAL,\n");
if (components & VB_HAS_NRM1)
WRITE(p, " float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB);
if (components & VB_HAS_NRM2)
WRITE(p, " float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB);
if (components & VB_HAS_COL0)
WRITE(p, " float4 color0 : COLOR0,\n");
if (components & VB_HAS_COL1)
WRITE(p, " float4 color1 : COLOR1,\n");
for (int i = 0; i < 8; ++i) {
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx )
WRITE(p, " float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i,i);
}
if (components & VB_HAS_POSMTXIDX)
WRITE(p, " half posmtx : ATTR%d,\n", SHADER_POSMTX_ATTRIB);
WRITE(p, " float4 rawpos : POSITION) {\n");
WRITE(p, "VS_OUTPUT o;\n");
// transforms
if (components & VB_HAS_POSMTXIDX) {
WRITE(p, "float4 pos = float4(dot("I_TRANSFORMMATRICES".T[posmtx].t, rawpos), dot("I_TRANSFORMMATRICES".T[posmtx+1].t, rawpos), dot("I_TRANSFORMMATRICES".T[posmtx+2].t, rawpos),1);\n");
if (components & VB_HAS_NRMALL) {
WRITE(p, "int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
WRITE(p, "float3 N0 = "I_NORMALMATRICES".T[normidx].t.xyz, N1 = "I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = "I_NORMALMATRICES".T[normidx+2].t.xyz;\n");
}
if (components & VB_HAS_NRM0)
WRITE(p, "half3 _norm0 = half3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0));\n"
"half3 norm0 = normalize(_norm0);\n");
if (components & VB_HAS_NRM1)
WRITE(p, "half3 _norm1 = half3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
//"half3 norm1 = normalize(_norm1);\n");
if (components & VB_HAS_NRM2)
WRITE(p, "half3 _norm2 = half3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
//"half3 norm2 = normalize(_norm2);\n");
}
else {
WRITE(p, "float4 pos = float4(dot("I_POSNORMALMATRIX".T0, rawpos), dot("I_POSNORMALMATRIX".T1, rawpos), dot("I_POSNORMALMATRIX".T2, rawpos), 1);\n");
if (components & VB_HAS_NRM0)
WRITE(p, "half3 _norm0 = half3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm0));\n"
"half3 norm0 = normalize(_norm0);\n");
if (components & VB_HAS_NRM1)
WRITE(p, "half3 _norm1 = half3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n");
//"half3 norm1 = normalize(_norm1);\n");
if (components & VB_HAS_NRM2)
WRITE(p, "half3 _norm2 = half3(dot("I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot("I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot("I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n");
//"half3 norm2 = normalize(_norm2);\n");
}
if (!(components & VB_HAS_NRM0))
WRITE(p, "half3 _norm0 = half3(0,0,0), norm0= half3(0,0,0);\n");
WRITE(p, "o.pos = float4(dot("I_PROJECTION".T0, pos), dot("I_PROJECTION".T1, pos), dot("I_PROJECTION".T2, pos), dot("I_PROJECTION".T3, pos));\n");
WRITE(p, "half4 mat, lacc;\n"
"half3 ldir, h;\n"
"half dist, dist2, attn;\n");
// lights/colors
for (int j = 0; j < xfregs.nNumChans; j++) {
// bool bColorAlphaSame = xfregs.colChans[j].color.hex == xfregs.colChans[j].alpha.hex; unused
const LitChannel& color = xfregs.colChans[j].color;
const LitChannel& alpha = xfregs.colChans[j].alpha;
WRITE(p, "{\n");
if (color.matsource) {// from vertex
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "mat = color%d;\n", j);
else WRITE(p, "mat = half4(1,1,1,1);\n");
}
else // from color
WRITE(p, "mat = "I_MATERIALS".C%d;\n", j+2);
if (color.enablelighting) {
if (color.ambsource) {// from vertex
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "lacc = color%d;\n", j);
else WRITE(p, "lacc = half4(0.0f,0.0f,0.0f,0.0f);\n");
}
else // from color
WRITE(p, "lacc = "I_MATERIALS".C%d;\n", j);
}
// check if alpha is different
if (alpha.matsource != color.matsource) {
if (alpha.matsource) {// from vertex
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "mat.w = color%d.w;\n", j);
else WRITE(p, "mat.w = 1;\n");
}
else // from color
WRITE(p, "mat.w = "I_MATERIALS".C%d.w;\n", j+2);
}
if (alpha.enablelighting && alpha.ambsource != color.ambsource) {
if (alpha.ambsource) {// from vertex
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "lacc.w = color%d.w;\n", j);
else WRITE(p, "lacc.w = 0;\n");
}
else // from color
WRITE(p, "lacc.w = "I_MATERIALS".C%d.w;\n", j);
}
if (color.enablelighting && alpha.enablelighting && (color.GetFullLightMask() != alpha.GetFullLightMask() || color.lightparams != alpha.lightparams)) {
// both have lighting, except not using the same lights
int mask = 0; // holds already computed lights
if (color.lightparams == alpha.lightparams && (color.GetFullLightMask() & alpha.GetFullLightMask())) {
// if lights are shared, compute those first
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
for (int i = 0; i < 8; ++i) {
if (mask&(1<<i))
p = GenerateLightShader(p, i, color, "lacc", 3);
}
}
// no shared lights
for (int i = 0; i < 8; ++i) {
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)) )
p = GenerateLightShader(p, i, color, "lacc", 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)) )
p = GenerateLightShader(p, i, alpha, "lacc", 2);
}
}
else if (color.enablelighting || alpha.enablelighting) {
// either one is enabled
int coloralpha = (int)color.enablelighting|((int)alpha.enablelighting<<1);
for (int i = 0; i < 8; ++i) {
if (color.GetFullLightMask() & (1<<i) )
p = GenerateLightShader(p, i, color.enablelighting?color:alpha, "lacc", coloralpha);
}
}
if (color.enablelighting != alpha.enablelighting) {
if (color.enablelighting )
WRITE(p, "o.colors[%d].xyz = mat.xyz * clamp(lacc.xyz,float3(0.0f,0.0f,0.0f),float3(1.0f,1.0f,1.0f));\n"
"o.colors[%d].w = mat.w;\n", j, j);
else
WRITE(p, "o.colors[%d].xyz = mat.xyz;\n"
"o.colors[%d].w = mat.w * clamp(lacc.w,0.0f,1.0f);\n", j, j);
}
else {
if (alpha.enablelighting )
WRITE(p, "o.colors[%d] = mat * clamp(lacc,float4(0.0f,0.0f,0.0f,0.0f), float4(1.0f,1.0f,1.0f,1.0f));\n", j);
else WRITE(p, "o.colors[%d] = mat;\n", j);
}
WRITE(p, "}\n");
}
// zero left over channels
for (int i = xfregs.nNumChans; i < 2; ++i)
WRITE(p, "o.colors[%d] = 0;\n", i);
// transform texcoords
for (int i = 0; i < xfregs.numTexGens; ++i) {
TexMtxInfo& texinfo = xfregs.texcoords[i].texmtxinfo;
WRITE(p, "{\n");
switch (texinfo.sourcerow) {
case XF_SRCGEOM_INROW:
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = rawpos;\n"); // pos.w is 1
break;
case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0) {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = float4(rawnorm0.xyz, 1.0);\n");
}
else WRITE(p, "float4 coord = 0;\n");
break;
case XF_SRCCOLORS_INROW:
_assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 );
break;
case XF_SRCBINORMAL_T_INROW:
if (components & VB_HAS_NRM1) {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = float4(rawnorm1.xyz, 1.0);\n");
}
else WRITE(p, "float4 coord = 0;\n");
break;
case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2) {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = float4(rawnorm2.xyz, 1.0);\n");
}
else WRITE(p, "float4 coord = 0;\n");
break;
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) )
WRITE(p, "float4 coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
else
WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
break;
}
// firs transformation
switch (texinfo.texgentype) {
case XF_TEXGEN_REGULAR:
if (components & (VB_HAS_TEXMTXIDX0<<i)) {
if (texinfo.projection == XF_TEXPROJ_STQ )
WRITE(p, "o.tex%d.xyz = float3(dot(coord, "I_TRANSFORMMATRICES".T[tex%d.z].t), dot(coord, "I_TRANSFORMMATRICES".T[tex%d.z+1].t), dot(coord, "I_TRANSFORMMATRICES".T[tex%d.z+2].t));\n", i, i, i, i);
else {
WRITE(p, "o.tex%d.xyz = float3(dot(coord, "I_TRANSFORMMATRICES".T[tex%d.z].t), dot(coord, "I_TRANSFORMMATRICES".T[tex%d.z+1].t), 1);\n", i, i, i);
}
}
else {
if (texinfo.projection == XF_TEXPROJ_STQ )
WRITE(p, "o.tex%d.xyz = float3(dot(coord, "I_TEXMATRICES".T[%d].t), dot(coord, "I_TEXMATRICES".T[%d].t), dot(coord, "I_TEXMATRICES".T[%d].t));\n", i, 3*i, 3*i+1, 3*i+2);
else
WRITE(p, "o.tex%d.xyz = float3(dot(coord, "I_TEXMATRICES".T[%d].t), dot(coord, "I_TEXMATRICES".T[%d].t), 1);\n", i, 3*i, 3*i+1);
}
break;
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) {
// transform the light dir into tangent space
WRITE(p, "ldir = normalize("I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift);
WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift);
}
else _assert_(0); // should have normals
break;
case XF_TEXGEN_COLOR_STRGBC0:
_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
WRITE(p, "o.tex%d.xyz = float3(o.colors[0].x, o.colors[0].y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
WRITE(p, "o.tex%d.xyz = float3(o.colors[1].x, o.colors[1].y, 1);\n", i);
break;
}
if(xfregs.bEnableDualTexTransform && texinfo.texgentype == XF_TEXGEN_REGULAR) { // only works for regular tex gen types?
if (xfregs.texcoords[i].postmtxinfo.normalize)
WRITE(p, "o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
//multiply by postmatrix
int postidx = xfregs.texcoords[i].postmtxinfo.index;
WRITE(p, "float4 P0 = "I_POSTTRANSFORMMATRICES".T[%d].t;\n"
"float4 P1 = "I_POSTTRANSFORMMATRICES".T[%d].t;\n"
"float4 P2 = "I_POSTTRANSFORMMATRICES".T[%d].t;\n",
postidx&0x3f, (postidx+1)&0x3f, (postidx+2)&0x3f);
WRITE(p, "o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
}
WRITE(p, "}\n");
}
if (ztexcoord >= 0 )
WRITE(p, "o.tex%d.w = o.pos.z/o.pos.w;\n", ztexcoord);
// if (bpmem.fog.c_proj_fsel.fsel != 0) {
// switch (bpmem.fog.c_proj_fsel.fsel) {
// case 1: // linear
// break;
// case 4: // exp
// break;
// case 5: // exp2
// break;
// case 6: // backward exp
// break;
// case 7: // backward exp2
// break;
// }
//
// WRITE(p, "o.fog = o.pos.z/o.pos.w;\n");
// }
WRITE(p, "return o;\n}\n");
if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("VertexShader generator - buffer too small, canary has been eaten!");
return text;
}
// coloralpha - 1 if color, 2 if alpha
char* GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha)
{
const char* swizzle = "xyzw";
if (coloralpha == 1 ) swizzle = "xyz";
else if (coloralpha == 2 ) swizzle = "w";
if (!(chan.attnfunc & 1)) {
// atten disabled
switch (chan.diffusefunc) {
case LIGHTDIF_NONE:
WRITE(p, "%s.%s += "I_LIGHTS".lights[%d].col.%s;\n", dest, swizzle, index, swizzle);
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
WRITE(p, "ldir = normalize("I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", index);
WRITE(p, "%s.%s += %sdot(ldir, norm0)) * "I_LIGHTS".lights[%d].col.%s;\n",
dest, swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", index, swizzle);
break;
default: _assert_(0);
}
}
else { // spec and spot
WRITE(p, "ldir = "I_LIGHTS".lights[%d].pos.xyz - pos.xyz;\n", index);
if (chan.attnfunc == 3) { // spot
WRITE(p, "dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, "I_LIGHTS".lights[%d].dir.xyz));\n",index);
WRITE(p, "attn = max(0.0f, dot("I_LIGHTS".lights[%d].cosatt.xyz, half3(1, attn, attn*attn))) / dot("I_LIGHTS".lights[%d].distatt.xyz, half3(1,dist,dist2));\n", index, index);
}
else if (chan.attnfunc == 1) { // specular
WRITE(p, "attn = dot(norm0, "I_LIGHTS".lights[%d].pos.xyz) > 0 ? max(0.0f, dot(norm0, "I_LIGHTS".lights[%d].dir.xyz)) : 0;\n", index, index);
WRITE(p, "ldir = half3(1,attn,attn*attn);\n");
WRITE(p, "attn = max(0.0f, dot("I_LIGHTS".lights[%d].cosatt.xyz, ldir)) / dot("I_LIGHTS".lights[%d].distatt.xyz, ldir);\n", index, index);
}
switch (chan.diffusefunc) {
case LIGHTDIF_NONE:
WRITE(p, "%s.%s += attn * "I_LIGHTS".lights[%d].col.%s;\n", dest, swizzle, index, swizzle);
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
WRITE(p, "%s.%s += attn * %sdot(ldir, norm0)) * "I_LIGHTS".lights[%d].col.%s;\n",
dest, swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", index, swizzle);
break;
default: _assert_(0);
}
}
WRITE(p, "\n");
return p;
}

View File

@ -1,83 +0,0 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef GCOGL_VERTEXSHADER_H
#define GCOGL_VERTEXSHADER_H
#include "XFMemory.h"
#define SHADER_POSMTX_ATTRIB 1
#define SHADER_NORM1_ATTRIB 6
#define SHADER_NORM2_ATTRIB 7
// m_components
enum {
VB_HAS_POSMTXIDX =(1<<1),
VB_HAS_TEXMTXIDX0=(1<<2),
VB_HAS_TEXMTXIDX1=(1<<3),
VB_HAS_TEXMTXIDX2=(1<<4),
VB_HAS_TEXMTXIDX3=(1<<5),
VB_HAS_TEXMTXIDX4=(1<<6),
VB_HAS_TEXMTXIDX5=(1<<7),
VB_HAS_TEXMTXIDX6=(1<<8),
VB_HAS_TEXMTXIDX7=(1<<9),
VB_HAS_TEXMTXIDXALL=(0xff<<2),
//VB_HAS_POS=0, // Implied, it always has pos! don't bother testing
VB_HAS_NRM0=(1<<10),
VB_HAS_NRM1=(1<<11),
VB_HAS_NRM2=(1<<12),
VB_HAS_NRMALL=(7<<10),
VB_HAS_COL0=(1<<13),
VB_HAS_COL1=(1<<14),
VB_HAS_UV0=(1<<15),
VB_HAS_UV1=(1<<16),
VB_HAS_UV2=(1<<17),
VB_HAS_UV3=(1<<18),
VB_HAS_UV4=(1<<19),
VB_HAS_UV5=(1<<20),
VB_HAS_UV6=(1<<21),
VB_HAS_UV7=(1<<22),
VB_HAS_UVALL=(0xff<<15),
VB_HAS_UVTEXMTXSHIFT=13,
};
// shader variables
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"
#define I_MATERIALS "cmtrl"
#define I_LIGHTS "clights"
#define I_TEXMATRICES "ctexmtx"
#define I_TRANSFORMMATRICES "ctrmtx"
#define I_NORMALMATRICES "cnmtx"
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_FOGPARAMS "cfog"
#define C_POSNORMALMATRIX 0
#define C_PROJECTION (C_POSNORMALMATRIX+6)
#define C_MATERIALS (C_PROJECTION+4)
#define C_LIGHTS (C_MATERIALS+4)
#define C_TEXMATRICES (C_LIGHTS+40)
#define C_TRANSFORMMATRICES (C_TEXMATRICES+24)
#define C_NORMALMATRICES (C_TRANSFORMMATRICES+64)
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES+32)
#define C_FOGPARAMS (C_POSTTRANSFORMMATRICES+64)
char *GenerateVertexShader(u32 components, bool has_zbuffer_target);
#endif

View File

@ -15,16 +15,15 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Globals.h"
#include "Profiler.h"
#include <Cg/cg.h>
#include <Cg/cgGL.h>
#include <math.h>
#include "Statistics.h"
#include "ImageWrite.h"
#include "Render.h"
#include "VertexShader.h"
@ -366,7 +365,8 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs)
int overfl;
int xoffs = 0, yoffs = 0;
int wid, hei, actualWid, actualHei;
int winw = nBackbufferWidth; int winh = nBackbufferHeight;
int winw = nBackbufferWidth;
int winh = nBackbufferHeight;
if (g_Config.bKeepAR)
{
// Check if height or width is the limiting factor
@ -405,7 +405,7 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs)
hei = ceil(fabs(2 * rawViewport[1]));
}
if(g_Config.bStretchToFit && g_Config.renderToMainframe)
if (g_Config.bStretchToFit && g_Config.renderToMainframe)
{
glViewport(
(int)(rawViewport[3]-rawViewport[0]-342-scissorXOff) + xoffs,
@ -471,7 +471,7 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs)
}
PRIM_LOG("Projection: %f %f %f %f %f %f\n", rawProjection[0], rawProjection[1], rawProjection[2], rawProjection[3], rawProjection[4], rawProjection[5]);
SetVSConstant4fv(C_PROJECTION, &g_fProjectionMatrix[0]);
SetVSConstant4fv(C_PROJECTION, &g_fProjectionMatrix[0]);
SetVSConstant4fv(C_PROJECTION+1, &g_fProjectionMatrix[4]);
SetVSConstant4fv(C_PROJECTION+2, &g_fProjectionMatrix[8]);
SetVSConstant4fv(C_PROJECTION+3, &g_fProjectionMatrix[12]);
@ -480,8 +480,10 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs)
void VertexShaderMngr::InvalidateXFRange(int start, int end)
{
if( ((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx*4 && (u32)start < (u32)MatrixIndexA.PosNormalMtxIdx*4+12) ||
((u32)start >= XFMEM_NORMALMATRICES+((u32)MatrixIndexA.PosNormalMtxIdx&31)*3 && (u32)start < XFMEM_NORMALMATRICES+((u32)MatrixIndexA.PosNormalMtxIdx&31)*3+9) ) {
if (((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx*4 &&
(u32)start < (u32)MatrixIndexA.PosNormalMtxIdx*4 + 12) ||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31)*3 &&
(u32)start < XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31)*3 + 9)) {
bPosNormalMatrixChanged = true;
}
@ -499,7 +501,7 @@ void VertexShaderMngr::InvalidateXFRange(int start, int end)
bTexMatricesChanged[1] = true;
}
if (start < XFMEM_POSMATRICES_END ) {
if (start < XFMEM_POSMATRICES_END) {
if (nTransformMatricesChanged[0] == -1) {
nTransformMatricesChanged[0] = start;
nTransformMatricesChanged[1] = end>XFMEM_POSMATRICES_END?XFMEM_POSMATRICES_END:end;
@ -510,7 +512,7 @@ void VertexShaderMngr::InvalidateXFRange(int start, int end)
}
}
if (start < XFMEM_NORMALMATRICES_END && end > XFMEM_NORMALMATRICES ) {
if (start < XFMEM_NORMALMATRICES_END && end > XFMEM_NORMALMATRICES) {
int _start = start < XFMEM_NORMALMATRICES ? 0 : start-XFMEM_NORMALMATRICES;
int _end = end < XFMEM_NORMALMATRICES_END ? end-XFMEM_NORMALMATRICES : XFMEM_NORMALMATRICES_END-XFMEM_NORMALMATRICES;
@ -524,7 +526,7 @@ void VertexShaderMngr::InvalidateXFRange(int start, int end)
}
}
if (start < XFMEM_POSTMATRICES_END && end > XFMEM_POSTMATRICES ) {
if (start < XFMEM_POSTMATRICES_END && end > XFMEM_POSTMATRICES) {
int _start = start < XFMEM_POSTMATRICES ? XFMEM_POSTMATRICES : start-XFMEM_POSTMATRICES;
int _end = end < XFMEM_POSTMATRICES_END ? end-XFMEM_POSTMATRICES : XFMEM_POSTMATRICES_END-XFMEM_POSTMATRICES;
@ -548,7 +550,7 @@ void VertexShaderMngr::InvalidateXFRange(int start, int end)
}
else {
if (nLightsChanged[0] > _start) nLightsChanged[0] = _start;
if (nLightsChanged[1] < _end) nLightsChanged[1] = _end;
if (nLightsChanged[1] < _end) nLightsChanged[1] = _end;
}
}
}
@ -598,7 +600,6 @@ void VertexShaderMngr::SetProjection(float* _pProjection, int constantIndex)
// LoadXFReg 0x10
void VertexShaderMngr::LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData)
{
u32 address = baseAddress;
for (int i = 0; i < (int)transferSize; i++)
{
@ -644,7 +645,7 @@ void VertexShaderMngr::LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData)
xfregs.hostinfo = *(INVTXSPEC*)&data;
break;
case 0x1009: //GXSetNumChans (no)
if ((u32)xfregs.nNumChans != (data&3) ) {
if ((u32)xfregs.nNumChans != (data&3)) {
VertexManager::Flush();
xfregs.nNumChans = data&3;
}

View File

@ -20,9 +20,6 @@
#include <map>
#include "VertexShader.h"
struct VERTEXSHADER
{
VERTEXSHADER() : glprogid(0) {}
@ -33,6 +30,55 @@ struct VERTEXSHADER
#endif
};
class VERTEXSHADERUID
{
public:
u32 values[9];
VERTEXSHADERUID() {
memset(values, 0, sizeof(values));
}
VERTEXSHADERUID(const VERTEXSHADERUID& r) {
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
}
int GetNumValues() const {
return (((values[0] >> 23) & 0xf)*3 + 3)/4 + 3; // numTexGens*3/4+1
}
bool operator <(const VERTEXSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i) {
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}
return false;
}
bool operator ==(const VERTEXSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i) {
if (values[i] != _Right.values[i])
return false;
}
return true;
}
};
class VertexShaderMngr
{
struct VSCacheEntry
@ -47,53 +93,7 @@ class VertexShaderMngr
}
};
class VERTEXSHADERUID
{
public:
VERTEXSHADERUID() {
memset(values, 0, sizeof(values));
}
VERTEXSHADERUID(const VERTEXSHADERUID& r) {
for(size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
}
bool operator<(const VERTEXSHADERUID& _Right) const
{
if( values[0] < _Right.values[0] )
return true;
else if( values[0] > _Right.values[0] )
return false;
int N = (((values[0]>>23)&0xf)*3+3)/4 + 3; // numTexGens*3/4+1
for(int i = 1; i < N; ++i) {
if( values[i] < _Right.values[i] )
return true;
else if( values[i] > _Right.values[i] )
return false;
}
return false;
}
bool operator==(const VERTEXSHADERUID& _Right) const
{
if( values[0] != _Right.values[0] )
return false;
int N = (((values[0]>>23)&0xf)*3+3)/4 + 3; // numTexGens*3/4+1
for(int i = 1; i < N; ++i) {
if( values[i] != _Right.values[i] )
return false;
}
return true;
}
u32 values[9];
};
typedef std::map<VERTEXSHADERUID,VSCacheEntry> VSCache;
typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache;
static VSCache vshaders;
static VERTEXSHADER* pShaderLast;