some little optimizations and some cleaning.

now lighting should be fully functional again in SM 2.0 cards

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6794 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2011-01-09 14:13:24 +00:00
parent b7767b63a3
commit d47d2a05d2
13 changed files with 58 additions and 42 deletions

View File

@ -26,7 +26,7 @@
// shader cache for every revision, graphics-related or not, which is simply annoying.
enum
{
LINEAR_DISKCACHE_VER = 6777
LINEAR_DISKCACHE_VER = 6792
};
// On disk format:

View File

@ -101,21 +101,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path)
if (!useDstAlpha)
{
output = "Destination alpha disabled:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
output = "Using dual source blending for destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
output = "Using two passes for emulating destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n";
output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
}

View File

@ -64,7 +64,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
((u32)bpmem.fog.c_proj_fsel.proj << 3) |
((u32)enableZTexture << 4);
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 2; ++i) {
uid->values[3 + i] = xfregs.colChans[i].color.enablelighting ?
@ -75,7 +75,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
(u32)xfregs.colChans[i].alpha.matsource) << 15;
}
}
uid->values[4] |= g_ActiveConfig.bEnablePixelLigting << 31;
uid->values[4] |= (g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
int hdr = 5;
u32 *pcurvalue = &uid->values[hdr];
@ -160,7 +160,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
// output is given by .outreg
// tevtemp is set according to swapmodetables and
static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms);
static void WriteStage(char *&p, int n, API_TYPE ApiType);
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
// static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, API_TYPE ApiType);
@ -442,7 +442,7 @@ char *GeneratePixelLightShader(char *p, int index, const LitChannel& chan, const
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 maxUniforms, u32 components)
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
{
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
@ -504,13 +504,11 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA);
WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS);
WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS);
if(C_INDTEXSCALE + 2 <= maxUniforms)
WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE);
if(C_INDTEXMTX + 6 <= maxUniforms)
WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX);
if(C_FOG + 2 <= maxUniforms)
WRITE(p, "uniform float4 "I_FOG"[2] : register(c%d);\n", C_FOG);
if(g_ActiveConfig.bEnablePixelLigting && C_PLIGHTS + 40 <= maxUniforms && C_PMATERIALS + 4 <= maxUniforms)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n");
WRITE(p,"typedef struct { Light lights[8]; } s_"I_PLIGHTS";\n");
@ -543,13 +541,13 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
for (int i = 0; i < numTexgen; ++i)
WRITE(p, ",\n in float3 uv%d : TEXCOORD%d", i, i);
WRITE(p, ",\n in float4 clipPos : TEXCOORD%d", numTexgen);
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, ",\n in float4 Normal : TEXCOORD%d", numTexgen + 1);
}
else
{
// wpos is in w of first 4 texcoords
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 8; ++i)
WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i);
@ -591,7 +589,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
" float2 wrappedcoord, tempcoord;\n"
" float4 cc0, cc1, cc2, cprev,crastemp,ckonsttemp;\n\n");
if(g_ActiveConfig.bEnablePixelLigting && C_PLIGHTS + 40 <= maxUniforms && C_PMATERIALS + 4 <= maxUniforms)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
if (xfregs.numTexGens < 7)
{
@ -751,7 +749,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
int texcoord = bpmem.tevindref.getTexCoord(i);
if (texcoord < numTexgen && C_INDTEXSCALE + 2 <= maxUniforms)
if (texcoord < numTexgen)
WRITE(p, "tempcoord = uv%d.xy * "I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
else
WRITE(p, "tempcoord = float2(0.0f, 0.0f);\n");
@ -773,7 +771,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
}
for (int i = 0; i < numStages; i++)
WriteStage(p, i, ApiType,maxUniforms); //build the equation for this stage
WriteStage(p, i, ApiType); //build the equation for this stage
if(numStages)
{
@ -838,7 +836,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
WRITE(p, " ocol0 = float4(prev.rgb, "I_ALPHA"[0].a);\n");
else
{
if(C_FOG + 2 <= maxUniforms)
WriteFog(p);
WRITE(p, " ocol0 = prev;\n");
}
@ -907,7 +904,7 @@ static const char *TEVCMPAlphaOPTable[16] =
};
static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms)
static void WriteStage(char *&p, int n, API_TYPE ApiType)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
@ -941,18 +938,18 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms)
// multiply by offset matrix and scale
if (bpmem.tevind[n].mid != 0)
{
if (bpmem.tevind[n].mid <= 3 && C_INDTEXMTX + 6 <= maxUniforms)
if (bpmem.tevind[n].mid <= 3)
{
int mtxidx = 2*(bpmem.tevind[n].mid-1);
WRITE(p, "float2 indtevtrans%d = float2(dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n",
n, mtxidx, n, mtxidx+1, n);
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord && C_INDTEXMTX + 6 <= maxUniforms)
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix
int mtxidx = 2*(bpmem.tevind[n].mid-5);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord && C_INDTEXMTX + 6 <= maxUniforms)
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix
int mtxidx = 2*(bpmem.tevind[n].mid-9);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);

View File

@ -113,7 +113,7 @@ enum DSTALPHA_MODE
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 maxUniforms, u32 components);
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode);
extern PIXELSHADERUID last_pixel_shader_uid;

View File

@ -215,7 +215,7 @@ void PixelShaderManager::SetConstants()
s_bFogParamChanged = false;
}
if (g_ActiveConfig.bEnablePixelLigting && nLightsChanged[0] >= 0) // config check added because the code in here was crashing for me inside SetPSConstant4f
if (g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting && nLightsChanged[0] >= 0) // config check added because the code in here was crashing for me inside SetPSConstant4f
{
// lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats
int istart = nLightsChanged[0] / 0x10;

View File

@ -45,7 +45,7 @@ void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
(u32)xfregs.colChans[i].alpha.hex :
(u32)xfregs.colChans[i].alpha.matsource) << 15;
}
uid->values[2] |= g_ActiveConfig.bEnablePixelLigting << 31;
uid->values[2] |= (g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
u32 *pcurvalue = &uid->values[3];
for (int i = 0; i < xfregs.numTexGens; ++i) {
TexMtxInfo tinfo = xfregs.texcoords[i].texmtxinfo;
@ -116,11 +116,11 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
for (int i = 0; i < xfregs.numTexGens; ++i)
WRITE(p, " float3 tex%d : TEXCOORD%d;\n", i, i);
WRITE(p, " float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGens);
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, " float4 Normal : TEXCOORD%d;\n", xfregs.numTexGens + 1);
} else {
// clip position is in w of first 4 texcoords
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 8; ++i)
WRITE(p, " float4 tex%d : TEXCOORD%d;\n", i, i);
@ -479,7 +479,7 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
WRITE(p, "o.tex3.w = o.pos.w;\n");
}
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
if (xfregs.numTexGens < 7) {
WRITE(p, "o.Normal = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n");

View File

@ -162,6 +162,7 @@ struct VideoConfig
bool bAllowSignedBytes; // D3D9 doesn't support signed bytes (?)
bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL
bool bSupportsFormatReinterpretation;
bool bSupportsPixelLighting;
} backend_info;
};

View File

@ -354,7 +354,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
}
// Need to compile a new shader
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, 65536, components);
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components);
D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code, strlen(code), &pbytecode))

View File

@ -156,6 +156,7 @@ void InitBackendInfo()
g_Config.backend_info.bAllowSignedBytes = true;
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsFormatReinterpretation = false;
g_Config.backend_info.bSupportsPixelLighting = true;
}
void DllConfig(void *_hParent)

View File

@ -94,9 +94,9 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::ReinterpRGBA6ToRGB8()
" out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0){\n"
" ocol0 = tex2D(samp0,uv0);\n"
" float4 src6 = trunc(ocol0 * 63.f);\n"
" ocol0.r = src6.r*4.f + trunc(src6.g/16.f);\n" // dst8r = (src6r<<2)|(src6g>>4);
" ocol0.g = frac(src6.g/16.f)*16.f*16.f+trunc(src6.b/4.f);\n" // dst8g = ((src6g&0xF)<<4)|(src6b>>2);
" float4 src6 = floor(ocol0 * 63.f);\n"
" ocol0.r = src6.r*4.f + floor(src6.g/16.f);\n" // dst8r = (src6r<<2)|(src6g>>4);
" ocol0.g = frac(src6.g/16.f)*16.f*16.f+floor(src6.b/4.f);\n" // dst8g = ((src6g&0xF)<<4)|(src6b>>2);
" ocol0.b = frac(src6.b/4.f)*4.f*64.f+src6.a;\n" // dst8b = ((src6b&0x3)<<6)|src6a;
" ocol0.a = 255.f;\n"
" ocol0 /= 255.f;\n"
@ -108,6 +108,7 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::ReinterpRGBA6ToRGB8()
LPDIRECT3DPIXELSHADER9 PixelShaderCache::ReinterpRGB8ToRGBA6()
{
/* old code here for reference
const char code[] =
{
"uniform sampler samp0 : register(s0);\n"
@ -123,6 +124,19 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::ReinterpRGB8ToRGBA6()
" ocol0 /= 63.f;\n"
"}\n"
};
*/
const char code[] =
{
"uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float2 uv0 : TEXCOORD0){\n"
"float4 temp1 = float4(1.0f/4.0f,1.0f/16.0f,1.0f/64.0f,0.0f);\n"
"float4 temp2 = float4(1.0f,64.0f,255.0f,1.0f/63.0f);\n"
"float4 src8 = floor(tex2D(samp0,uv0)*temp2.z) * temp1;\n"
"ocol0 = (frac(src8.wxyz) * temp2.xyyy + src8) * temp2.w;\n"
"}\n"
};
if (!s_rgb8_to_rgba6) s_rgb8_to_rgba6 = D3D::CompileAndCreatePixelShader(code, (int)strlen(code));
return s_rgb8_to_rgba6;
}
@ -345,11 +359,10 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
return (entry.shader != NULL);
}
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
// Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, maxConstants, components);
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, components);
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
unique_shaders.insert(code_hash);

View File

@ -159,6 +159,9 @@ void InitBackendInfo()
g_Config.backend_info.bAllowSignedBytes = false;
g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsFormatReinterpretation = true;
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;
}
void DllConfig(void *_hParent)

View File

@ -227,7 +227,7 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp
PSCacheEntry& newentry = PixelShaders[uid];
newentry.frameCount = frameCount;
pShaderLast = &newentry.shader;
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, 65536, components);
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {

View File

@ -199,6 +199,7 @@ void InitBackendInfo()
g_Config.backend_info.bAllowSignedBytes = true;
g_Config.backend_info.bSupportsDualSourceBlend = false; // supported, but broken
g_Config.backend_info.bSupportsFormatReinterpretation = false;
g_Config.backend_info.bSupportsPixelLighting = true;
}
void DllConfig(void *_hParent)