reduced frequency of dx9 ps_2_0 pixel generation errors, and made dx9 efb depth peek of 16-bit depth buffer not use 24-bit adjustment factor. shouldn't affect other the plugins.

(probably nobody else cares, but I need at least one video plugin that actually works on this computer)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6618 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nitsuja-
2010-12-19 20:59:23 +00:00
parent 47454a4ed9
commit 98fe8437ae
12 changed files with 200 additions and 166 deletions

View File

@ -44,25 +44,29 @@ static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders;
#define MAX_SSAA_SHADERS 3
enum
{
COPY_TYPE_DIRECT,
COPY_TYPE_MATRIXCOLOR,
NUM_COPY_TYPES
};
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_CopyProgram[NUM_COPY_TYPES][PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES][MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode)
{
return s_ColorMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS];
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode)
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode, int depthConversionType)
{
return s_DepthMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][depthConversionType % NUM_DEPTH_CONVERSION_TYPES][SSAAMode % MAX_SSAA_SHADERS];
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode)
{
return s_ColorCopyProgram[SSAAMode % MAX_SSAA_SHADERS];
return s_CopyProgram[COPY_TYPE_DIRECT][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS];
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
@ -95,132 +99,120 @@ public:
}
};
#define WRITE p+=sprintf
static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConversionType, int SSAAMode)
{
//Used for Copy/resolve the color buffer
//Color conversion Programs
//Depth copy programs
// this should create the same shaders as before (plus some extras added for DF16), just... more manageably than listing the full program for each combination
char text[3072];
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
char* p = text;
WRITE(p, "// Copy/Color Matrix/Depth Matrix shader (matrix=%d, depth=%d, ssaa=%d)\n", copyMatrixType, depthConversionType, SSAAMode);
WRITE(p, "uniform sampler samp0 : register(s0);\n");
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "void main(\n"
"out float4 ocol0 : COLOR0,\n");
switch(SSAAMode % MAX_SSAA_SHADERS)
{
case 0: // 1 Sample
WRITE(p, "in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n");
break;
case 1: // 1 Samples SSAA
WRITE(p, "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n");
break;
case 2: // 4 Samples SSAA
WRITE(p, "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n");
break;
}
switch(depthConversionType % PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES)
{
case PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE:
break;
case PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT:
// this is probably wrong. but it works better than the 24-bit conversion we used to generate in this case.
WRITE(p, "float4 EncodedDepth = frac((texcol.r * (65535.0f/65536.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (65536.0f/65535.0f)),1.0f);\n");
break;
case PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT:
WRITE(p, "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n");
break;
}
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n");
else
WRITE(p, "ocol0 = texcol;\n");
WRITE(p, "}\n");
if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("PixelShaderCache copy shader generator - buffer too small, canary has been eaten!");
setlocale(LC_NUMERIC, ""); // restore locale
return D3D::CompileAndCreatePixelShader(text, (int)strlen(text));
}
void PixelShaderCache::Init()
{
//program used for clear screen
char pprog[3072];
sprintf(pprog, "void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 incol0 : COLOR0){\n"
"ocol0 = incol0;\n"
"}\n");
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
{
char pprog[3072];
sprintf(pprog, "void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 incol0 : COLOR0){\n"
"ocol0 = incol0;\n"
"}\n");
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
}
//Used for Copy/resolve the color buffer
//1 Sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float2 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0);\n"
"}\n");
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants);
//1 Samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"ocol0 = tex2D(samp0,uv0.xy);\n"
"}\n");
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 Samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25;\n"
"}\n");
s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Color conversion Programs
//1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//1 samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Depth copy programs
//1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//1 sample SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 sample SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
// other screen copy/convert programs
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
{
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
{
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
{
if(ssaaMode && !s_CopyProgram[copyMatrixType][depthType][ssaaMode-1]
|| depthType && !s_CopyProgram[copyMatrixType][depthType-1][ssaaMode]
|| copyMatrixType && !s_CopyProgram[copyMatrixType-1][depthType][ssaaMode])
{
// if it failed at a lower setting, it's going to fail here for the same reason it did there,
// so skip this attempt to avoid duplicate error messages.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
}
else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix)
{
// color matrix not supported, so substitute the nearest equivalent program that doesn't use it.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode];
}
else
{
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode);
}
}
}
}
Clear();
@ -248,15 +240,18 @@ void PixelShaderCache::Clear()
void PixelShaderCache::Shutdown()
{
for(int i = 0;i < MAX_SSAA_SHADERS; i++)
{
if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release();
s_ColorMatrixProgram[i] = NULL;
if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release();
s_ColorCopyProgram[i] = NULL;
if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release();
s_DepthMatrixProgram[i] = NULL;
}
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
if(s_CopyProgram[copyMatrixType][depthType][ssaaMode]
&& (copyMatrixType == 0 || s_CopyProgram[copyMatrixType][depthType][ssaaMode] != s_CopyProgram[copyMatrixType-1][depthType][ssaaMode]))
s_CopyProgram[copyMatrixType][depthType][ssaaMode]->Release();
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
if (s_ClearProgram) s_ClearProgram->Release();
s_ClearProgram = NULL;
@ -296,8 +291,11 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
return (entry.shader != NULL);
}
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
// Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, components);
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, maxConstants, components);
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
unique_shaders.insert(code_hash);